tu: Add support for a "lazy" sparse VMA
Add an extremely limited form of sparse where zeroing memory is not supported and only one BO can be fully bound to the sparse VMA immediately when it's created. This can be implemented on drm/msm even without VM_BIND, by just reserving the iova range. However kgsl doesn't let us control iova offsets, so we have to use "real" sparse support to implement it. In effect this lets us reserve an iova range and then "lazily" allocate the BO. This will be used for transient allocations in Vulkan when we have to fallback to sysmem. As part of this we add skeleton sparse VMA support to virtio, which is just enough for lazy VMAs. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37151>
This commit is contained in:
parent
93a80f4bb9
commit
88d001383a
8 changed files with 145 additions and 68 deletions
|
|
@ -42,7 +42,7 @@ tu_cmd_buffer_setup_status_tracking(struct tu_device *device)
|
|||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
TU_BO_ALLOC_INTERNAL_RESOURCE, "cmd_buffer_status");
|
||||
TU_BO_ALLOC_INTERNAL_RESOURCE, NULL, "cmd_buffer_status");
|
||||
if (result != VK_SUCCESS)
|
||||
return NULL;
|
||||
|
||||
|
|
|
|||
|
|
@ -3306,7 +3306,7 @@ tu_AllocateMemory(VkDevice _device,
|
|||
device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
|
||||
result = tu_bo_init_new_explicit_iova(
|
||||
device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize,
|
||||
client_address, mem_property, alloc_flags, name);
|
||||
client_address, mem_property, alloc_flags, NULL, name);
|
||||
}
|
||||
|
||||
if (result == VK_SUCCESS) {
|
||||
|
|
|
|||
|
|
@ -132,6 +132,8 @@ struct tu_physical_device
|
|||
bool has_sparse;
|
||||
/* Whether TU_SPARSE_VMA_MAP_ZERO can be used. */
|
||||
bool has_sparse_prr;
|
||||
/* Whether lazy allocations are supported. */
|
||||
bool has_lazy_bos;
|
||||
uint64_t va_start;
|
||||
uint64_t va_size;
|
||||
|
||||
|
|
@ -654,7 +656,7 @@ tu_bo_init_new_cached(struct tu_device *dev, struct vk_object_base *base,
|
|||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
(dev->physical_device->has_cached_coherent_memory ?
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0),
|
||||
flags, name);
|
||||
flags, NULL, name);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,9 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
|
|||
uint64_t size,
|
||||
uint64_t client_iova,
|
||||
VkMemoryPropertyFlags mem_property,
|
||||
enum tu_bo_alloc_flags flags, const char *name)
|
||||
enum tu_bo_alloc_flags flags,
|
||||
struct tu_sparse_vma *lazy_vma,
|
||||
const char *name)
|
||||
{
|
||||
MESA_TRACE_FUNC();
|
||||
struct tu_instance *instance = dev->physical_device->instance;
|
||||
|
|
@ -44,7 +46,7 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
|
|||
|
||||
VkResult result =
|
||||
dev->instance->knl->bo_init(dev, base, out_bo, size, client_iova,
|
||||
mem_property, flags, name);
|
||||
mem_property, flags, lazy_vma, name);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ struct tu_bo {
|
|||
bool implicit_sync : 1;
|
||||
bool never_unmap : 1;
|
||||
bool cached_non_coherent : 1;
|
||||
bool lazy : 1;
|
||||
|
||||
bool dump;
|
||||
|
||||
|
|
@ -67,6 +68,7 @@ struct tu_bo {
|
|||
};
|
||||
|
||||
enum tu_sparse_vma_flags {
|
||||
TU_SPARSE_VMA_NONE = 0,
|
||||
TU_SPARSE_VMA_REPLAYABLE = 1 << 0,
|
||||
|
||||
/* Make unmapped pages in the memory region map to the PRR NULL page. This
|
||||
|
|
@ -120,10 +122,13 @@ struct tu_knl {
|
|||
VkResult (*bo_init)(struct tu_device *dev, struct vk_object_base *base,
|
||||
struct tu_bo **out_bo, uint64_t size, uint64_t client_iova,
|
||||
VkMemoryPropertyFlags mem_property,
|
||||
enum tu_bo_alloc_flags flags, const char *name);
|
||||
enum tu_bo_alloc_flags flags,
|
||||
struct tu_sparse_vma *lazy_vma,
|
||||
const char *name);
|
||||
VkResult (*bo_init_dmabuf)(struct tu_device *dev, struct tu_bo **out_bo,
|
||||
uint64_t size, int prime_fd);
|
||||
int (*bo_export_dmabuf)(struct tu_device *dev, struct tu_bo *bo);
|
||||
VkResult (*bo_alloc_lazy)(struct tu_device *dev, struct tu_bo *bo);
|
||||
VkResult (*bo_map)(struct tu_device *dev, struct tu_bo *bo, void *placed_addr);
|
||||
void (*bo_allow_dump)(struct tu_device *dev, struct tu_bo *bo);
|
||||
void (*bo_finish)(struct tu_device *dev, struct tu_bo *bo);
|
||||
|
|
@ -177,6 +182,7 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
|
|||
uint64_t client_iova,
|
||||
VkMemoryPropertyFlags mem_property,
|
||||
enum tu_bo_alloc_flags flags,
|
||||
struct tu_sparse_vma *lazy_vma,
|
||||
const char *name);
|
||||
|
||||
static inline VkResult
|
||||
|
|
@ -189,7 +195,7 @@ tu_bo_init_new(struct tu_device *dev, struct vk_object_base *base,
|
|||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
flags, name);
|
||||
flags, NULL, name);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
|
|||
|
|
@ -822,13 +822,17 @@ msm_bo_init(struct tu_device *dev,
|
|||
uint64_t client_iova,
|
||||
VkMemoryPropertyFlags mem_property,
|
||||
enum tu_bo_alloc_flags flags,
|
||||
struct tu_sparse_vma *lazy_vma,
|
||||
const char *name)
|
||||
{
|
||||
MESA_TRACE_FUNC();
|
||||
VkResult result;
|
||||
VkResult result = VK_SUCCESS;
|
||||
uint64_t iova;
|
||||
|
||||
result = tu_allocate_iova(dev, 0, size, client_iova, flags, &iova);
|
||||
if (lazy_vma)
|
||||
iova = lazy_vma->msm.iova;
|
||||
else
|
||||
result = tu_allocate_iova(dev, 0, size, client_iova, flags, &iova);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
|
@ -857,9 +861,11 @@ msm_bo_init(struct tu_device *dev,
|
|||
int ret = drmCommandWriteRead(dev->fd,
|
||||
DRM_MSM_GEM_NEW, &req, sizeof(req));
|
||||
if (ret) {
|
||||
msm_vma_lock(dev);
|
||||
util_vma_heap_free(&dev->vma, iova, size);
|
||||
msm_vma_unlock(dev);
|
||||
if (!lazy_vma) {
|
||||
msm_vma_lock(dev);
|
||||
util_vma_heap_free(&dev->vma, iova, size);
|
||||
msm_vma_unlock(dev);
|
||||
}
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
|
|
@ -877,10 +883,13 @@ msm_bo_init(struct tu_device *dev,
|
|||
TU_RMV(internal_resource_create, dev, bo);
|
||||
TU_RMV(resource_name, dev, bo, name);
|
||||
}
|
||||
bo->lazy = !!lazy_vma;
|
||||
} else {
|
||||
msm_vma_lock(dev);
|
||||
util_vma_heap_free(&dev->vma, iova, size);
|
||||
msm_vma_unlock(dev);
|
||||
if (!lazy_vma) {
|
||||
msm_vma_lock(dev);
|
||||
util_vma_heap_free(&dev->vma, iova, size);
|
||||
msm_vma_unlock(dev);
|
||||
}
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
}
|
||||
|
||||
|
|
@ -1084,12 +1093,14 @@ msm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
|
|||
TU_RMV(bo_destroy, dev, bo);
|
||||
|
||||
if (dev->physical_device->has_vm_bind) {
|
||||
tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, bo->iova, 0, 0,
|
||||
bo->size);
|
||||
if (!bo->lazy) {
|
||||
tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, bo->iova, 0, 0,
|
||||
bo->size);
|
||||
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
util_vma_heap_free(&dev->vma, bo->iova, bo->size);
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
util_vma_heap_free(&dev->vma, bo->iova, bo->size);
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
}
|
||||
|
||||
msm_bo_gem_close(dev, bo);
|
||||
} else if (dev->physical_device->has_set_iova) {
|
||||
|
|
@ -1119,15 +1130,14 @@ msm_sparse_vma_init(struct tu_device *dev,
|
|||
|
||||
out_vma->msm.size = size;
|
||||
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
result = tu_allocate_userspace_iova(dev, size, client_iova, bo_flags,
|
||||
&out_vma->msm.iova);
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
result = tu_allocate_iova(dev, 0, size, client_iova, bo_flags,
|
||||
&out_vma->msm.iova);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (flags & TU_SPARSE_VMA_MAP_ZERO) {
|
||||
assert(dev->physical_device->has_vm_bind);
|
||||
result = tu_map_vm_bind(dev, MSM_VM_BIND_OP_MAP_NULL, 0,
|
||||
out_vma->msm.iova, 0, 0, size);
|
||||
}
|
||||
|
|
@ -1141,8 +1151,10 @@ static void
|
|||
msm_sparse_vma_finish(struct tu_device *dev,
|
||||
struct tu_sparse_vma *vma)
|
||||
{
|
||||
tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, vma->msm.iova, 0, 0,
|
||||
vma->msm.size);
|
||||
if (dev->physical_device->has_vm_bind) {
|
||||
tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, vma->msm.iova, 0, 0,
|
||||
vma->msm.size);
|
||||
}
|
||||
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
util_vma_heap_free(&dev->vma, vma->msm.iova, vma->msm.size);
|
||||
|
|
@ -1549,6 +1561,7 @@ tu_knl_drm_msm_load(struct tu_instance *instance,
|
|||
|
||||
device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start,
|
||||
&device->va_size);
|
||||
device->has_lazy_bos = device->has_set_iova;
|
||||
device->has_raytracing = tu_drm_get_raytracing(device);
|
||||
device->has_sparse_prr = tu_drm_get_prr(device);
|
||||
|
||||
|
|
|
|||
|
|
@ -678,6 +678,7 @@ virtio_bo_init(struct tu_device *dev,
|
|||
uint64_t client_iova,
|
||||
VkMemoryPropertyFlags mem_property,
|
||||
enum tu_bo_alloc_flags flags,
|
||||
struct tu_sparse_vma *lazy_vma,
|
||||
const char *name)
|
||||
{
|
||||
MESA_TRACE_FUNC();
|
||||
|
|
@ -686,7 +687,7 @@ virtio_bo_init(struct tu_device *dev,
|
|||
.hdr = MSM_CCMD(GEM_NEW, sizeof(req)),
|
||||
.size = size,
|
||||
};
|
||||
VkResult result;
|
||||
VkResult result = VK_SUCCESS;
|
||||
uint32_t res_id;
|
||||
struct tu_bo *bo;
|
||||
|
||||
|
|
@ -716,10 +717,14 @@ virtio_bo_init(struct tu_device *dev,
|
|||
|
||||
assert(!(flags & TU_BO_ALLOC_DMABUF));
|
||||
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova,
|
||||
flags, &req.iova);
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
if (lazy_vma) {
|
||||
req.iova = lazy_vma->msm.iova;
|
||||
} else {
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova,
|
||||
flags, &req.iova);
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
}
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
|
@ -910,6 +915,45 @@ virtio_bo_finish(struct tu_device *dev, struct tu_bo *bo)
|
|||
u_rwlock_rdunlock(&dev->dma_bo_lock);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
virtio_sparse_vma_init(struct tu_device *dev,
|
||||
struct vk_object_base *base,
|
||||
struct tu_sparse_vma *out_vma,
|
||||
uint64_t *out_iova,
|
||||
enum tu_sparse_vma_flags flags,
|
||||
uint64_t size, uint64_t client_iova)
|
||||
{
|
||||
VkResult result;
|
||||
enum tu_bo_alloc_flags bo_flags =
|
||||
(flags & TU_SPARSE_VMA_REPLAYABLE) ? TU_BO_ALLOC_REPLAYABLE :
|
||||
(enum tu_bo_alloc_flags)0;
|
||||
|
||||
out_vma->msm.size = size;
|
||||
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova,
|
||||
bo_flags, &out_vma->msm.iova);
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
assert(!(flags & TU_SPARSE_VMA_MAP_ZERO));
|
||||
|
||||
*out_iova = out_vma->msm.iova;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
virtio_sparse_vma_finish(struct tu_device *dev,
|
||||
struct tu_sparse_vma *vma)
|
||||
{
|
||||
mtx_lock(&dev->vma_mutex);
|
||||
util_vma_heap_free(&dev->vma, vma->msm.iova, vma->msm.size);
|
||||
mtx_unlock(&dev->vma_mutex);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
setup_fence_cmds(struct tu_device *dev)
|
||||
{
|
||||
|
|
@ -1156,6 +1200,8 @@ static const struct tu_knl virtio_knl_funcs = {
|
|||
.submit_add_entries = msm_submit_add_entries,
|
||||
.queue_submit = virtio_queue_submit,
|
||||
.queue_wait_fence = virtio_queue_wait_fence,
|
||||
.sparse_vma_init = virtio_sparse_vma_init,
|
||||
.sparse_vma_finish = virtio_sparse_vma_finish,
|
||||
};
|
||||
|
||||
VkResult
|
||||
|
|
@ -1282,6 +1328,7 @@ tu_knl_drm_virtio_load(struct tu_instance *instance,
|
|||
device->va_size = caps.u.msm.va_size;
|
||||
device->ubwc_config.highest_bank_bit = caps.u.msm.highest_bank_bit;
|
||||
device->has_set_iova = true;
|
||||
device->has_lazy_bos = true;
|
||||
device->has_preemption = has_preemption;
|
||||
device->uche_trap_base = uche_trap_base;
|
||||
|
||||
|
|
|
|||
|
|
@ -197,6 +197,39 @@ kgsl_bo_user_map(struct tu_device *dev, struct tu_bo *bo, uint64_t client_iova)
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
kgsl_sparse_vma_map(struct tu_device *dev,
|
||||
struct tu_sparse_vma *vma,
|
||||
struct tu_bo *bo, uint64_t bo_offset)
|
||||
{
|
||||
struct kgsl_gpumem_bind_range range = {
|
||||
.child_offset = bo_offset,
|
||||
.target_offset = 0,
|
||||
.length = vma->kgsl.virtual_bo->size,
|
||||
.child_id = bo->gem_handle,
|
||||
.op = KGSL_GPUMEM_RANGE_OP_BIND,
|
||||
};
|
||||
|
||||
struct kgsl_gpumem_bind_ranges req = {
|
||||
.ranges = (uint64_t)(uintptr_t)&range,
|
||||
.ranges_nents = 1,
|
||||
.ranges_size = sizeof(range),
|
||||
.id = vma->kgsl.virtual_bo->gem_handle,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
int ret;
|
||||
|
||||
ret = safe_ioctl(dev->physical_device->local_fd,
|
||||
IOCTL_KGSL_GPUMEM_BIND_RANGES, &req);
|
||||
if (ret) {
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"GPUMEM_BIND_RANGES failed (%s)", strerror(errno));
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
kgsl_bo_init(struct tu_device *dev,
|
||||
struct vk_object_base *base,
|
||||
|
|
@ -205,6 +238,7 @@ kgsl_bo_init(struct tu_device *dev,
|
|||
uint64_t client_iova,
|
||||
VkMemoryPropertyFlags mem_property,
|
||||
enum tu_bo_alloc_flags flags,
|
||||
struct tu_sparse_vma *lazy_vma,
|
||||
const char *name)
|
||||
{
|
||||
if (flags & TU_BO_ALLOC_SHAREABLE) {
|
||||
|
|
@ -269,12 +303,17 @@ kgsl_bo_init(struct tu_device *dev,
|
|||
.base = base,
|
||||
};
|
||||
|
||||
if (flags & TU_BO_ALLOC_REPLAYABLE) {
|
||||
VkResult result = kgsl_bo_user_map(dev, bo, client_iova);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
if (lazy_vma) {
|
||||
result = kgsl_sparse_vma_map(dev, lazy_vma, bo, 0);
|
||||
} else if (flags & TU_BO_ALLOC_REPLAYABLE) {
|
||||
result = kgsl_bo_user_map(dev, bo, client_iova);
|
||||
}
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
tu_dump_bo_init(dev, bo);
|
||||
|
||||
*out_bo = bo;
|
||||
|
|
@ -478,39 +517,6 @@ kgsl_sparse_vma_init(struct tu_device *dev,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
kgsl_sparse_vma_map(struct tu_device *dev,
|
||||
struct tu_sparse_vma *vma,
|
||||
struct tu_bo *bo, uint64_t bo_offset)
|
||||
{
|
||||
struct kgsl_gpumem_bind_range range = {
|
||||
.child_offset = bo_offset,
|
||||
.target_offset = 0,
|
||||
.length = vma->kgsl.virtual_bo->size,
|
||||
.child_id = bo->gem_handle,
|
||||
.op = KGSL_GPUMEM_RANGE_OP_BIND,
|
||||
};
|
||||
|
||||
struct kgsl_gpumem_bind_ranges req = {
|
||||
.ranges = (uint64_t)(uintptr_t)&range,
|
||||
.ranges_nents = 1,
|
||||
.ranges_size = sizeof(range),
|
||||
.id = vma->kgsl.virtual_bo->gem_handle,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
int ret;
|
||||
|
||||
ret = safe_ioctl(dev->physical_device->local_fd,
|
||||
IOCTL_KGSL_GPUMEM_BIND_RANGES, &req);
|
||||
if (ret) {
|
||||
return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"GPUMEM_BIND_RANGES failed (%s)", strerror(errno));
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
kgsl_sparse_vma_finish(struct tu_device *dev,
|
||||
struct tu_sparse_vma *vma)
|
||||
|
|
@ -1809,6 +1815,7 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd)
|
|||
|
||||
device->has_sparse = kgsl_is_virtual_bo_supported(fd);
|
||||
device->has_sparse_prr = device->has_sparse;
|
||||
device->has_lazy_bos = device->has_sparse;
|
||||
get_kgsl_prop(fd, KGSL_PROP_GPU_VA64_SIZE, &device->va_size,
|
||||
sizeof(device->va_size));
|
||||
/* We don't actually use the VMA, but set a fake offset so that it doesn't
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue