diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 7e016e8d8ce..a405fc1de76 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -42,7 +42,7 @@ tu_cmd_buffer_setup_status_tracking(struct tu_device *device) VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - TU_BO_ALLOC_INTERNAL_RESOURCE, "cmd_buffer_status"); + TU_BO_ALLOC_INTERNAL_RESOURCE, NULL, "cmd_buffer_status"); if (result != VK_SUCCESS) return NULL; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 6600bb8940c..cb6ba0439e3 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -3306,7 +3306,7 @@ tu_AllocateMemory(VkDevice _device, device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex]; result = tu_bo_init_new_explicit_iova( device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize, - client_address, mem_property, alloc_flags, name); + client_address, mem_property, alloc_flags, NULL, name); } if (result == VK_SUCCESS) { diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 02d8b7e9227..03294791f7a 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -132,6 +132,8 @@ struct tu_physical_device bool has_sparse; /* Whether TU_SPARSE_VMA_MAP_ZERO can be used. */ bool has_sparse_prr; + /* Whether lazy allocations are supported. */ + bool has_lazy_bos; uint64_t va_start; uint64_t va_size; @@ -654,7 +656,7 @@ tu_bo_init_new_cached(struct tu_device *dev, struct vk_object_base *base, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | (dev->physical_device->has_cached_coherent_memory ? VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0), - flags, name); + flags, NULL, name); } diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc index d29fe9bcab4..5b8a0a31626 100644 --- a/src/freedreno/vulkan/tu_knl.cc +++ b/src/freedreno/vulkan/tu_knl.cc @@ -35,7 +35,9 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, uint64_t size, uint64_t client_iova, VkMemoryPropertyFlags mem_property, - enum tu_bo_alloc_flags flags, const char *name) + enum tu_bo_alloc_flags flags, + struct tu_sparse_vma *lazy_vma, + const char *name) { MESA_TRACE_FUNC(); struct tu_instance *instance = dev->physical_device->instance; @@ -44,7 +46,7 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, VkResult result = dev->instance->knl->bo_init(dev, base, out_bo, size, client_iova, - mem_property, flags, name); + mem_property, flags, lazy_vma, name); if (result != VK_SUCCESS) return result; diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h index edaca35f3e9..60373b7bbce 100644 --- a/src/freedreno/vulkan/tu_knl.h +++ b/src/freedreno/vulkan/tu_knl.h @@ -57,6 +57,7 @@ struct tu_bo { bool implicit_sync : 1; bool never_unmap : 1; bool cached_non_coherent : 1; + bool lazy : 1; bool dump; @@ -67,6 +68,7 @@ struct tu_bo { }; enum tu_sparse_vma_flags { + TU_SPARSE_VMA_NONE = 0, TU_SPARSE_VMA_REPLAYABLE = 1 << 0, /* Make unmapped pages in the memory region map to the PRR NULL page. This @@ -120,10 +122,13 @@ struct tu_knl { VkResult (*bo_init)(struct tu_device *dev, struct vk_object_base *base, struct tu_bo **out_bo, uint64_t size, uint64_t client_iova, VkMemoryPropertyFlags mem_property, - enum tu_bo_alloc_flags flags, const char *name); + enum tu_bo_alloc_flags flags, + struct tu_sparse_vma *lazy_vma, + const char *name); VkResult (*bo_init_dmabuf)(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, int prime_fd); int (*bo_export_dmabuf)(struct tu_device *dev, struct tu_bo *bo); + VkResult (*bo_alloc_lazy)(struct tu_device *dev, struct tu_bo *bo); VkResult (*bo_map)(struct tu_device *dev, struct tu_bo *bo, void *placed_addr); void (*bo_allow_dump)(struct tu_device *dev, struct tu_bo *bo); void (*bo_finish)(struct tu_device *dev, struct tu_bo *bo); @@ -177,6 +182,7 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, uint64_t client_iova, VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, + struct tu_sparse_vma *lazy_vma, const char *name); static inline VkResult @@ -189,7 +195,7 @@ tu_bo_init_new(struct tu_device *dev, struct vk_object_base *base, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - flags, name); + flags, NULL, name); } VkResult diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index 59d30540719..8c69dc6d37a 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -822,13 +822,17 @@ msm_bo_init(struct tu_device *dev, uint64_t client_iova, VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, + struct tu_sparse_vma *lazy_vma, const char *name) { MESA_TRACE_FUNC(); - VkResult result; + VkResult result = VK_SUCCESS; uint64_t iova; - result = tu_allocate_iova(dev, 0, size, client_iova, flags, &iova); + if (lazy_vma) + iova = lazy_vma->msm.iova; + else + result = tu_allocate_iova(dev, 0, size, client_iova, flags, &iova); if (result != VK_SUCCESS) return result; @@ -857,9 +861,11 @@ msm_bo_init(struct tu_device *dev, int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req)); if (ret) { - msm_vma_lock(dev); - util_vma_heap_free(&dev->vma, iova, size); - msm_vma_unlock(dev); + if (!lazy_vma) { + msm_vma_lock(dev); + util_vma_heap_free(&dev->vma, iova, size); + msm_vma_unlock(dev); + } return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); } @@ -877,10 +883,13 @@ msm_bo_init(struct tu_device *dev, TU_RMV(internal_resource_create, dev, bo); TU_RMV(resource_name, dev, bo, name); } + bo->lazy = !!lazy_vma; } else { - msm_vma_lock(dev); - util_vma_heap_free(&dev->vma, iova, size); - msm_vma_unlock(dev); + if (!lazy_vma) { + msm_vma_lock(dev); + util_vma_heap_free(&dev->vma, iova, size); + msm_vma_unlock(dev); + } memset(bo, 0, sizeof(*bo)); } @@ -1084,12 +1093,14 @@ msm_bo_finish(struct tu_device *dev, struct tu_bo *bo) TU_RMV(bo_destroy, dev, bo); if (dev->physical_device->has_vm_bind) { - tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, bo->iova, 0, 0, - bo->size); + if (!bo->lazy) { + tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, bo->iova, 0, 0, + bo->size); - mtx_lock(&dev->vma_mutex); - util_vma_heap_free(&dev->vma, bo->iova, bo->size); - mtx_unlock(&dev->vma_mutex); + mtx_lock(&dev->vma_mutex); + util_vma_heap_free(&dev->vma, bo->iova, bo->size); + mtx_unlock(&dev->vma_mutex); + } msm_bo_gem_close(dev, bo); } else if (dev->physical_device->has_set_iova) { @@ -1119,15 +1130,14 @@ msm_sparse_vma_init(struct tu_device *dev, out_vma->msm.size = size; - mtx_lock(&dev->vma_mutex); - result = tu_allocate_userspace_iova(dev, size, client_iova, bo_flags, - &out_vma->msm.iova); - mtx_unlock(&dev->vma_mutex); + result = tu_allocate_iova(dev, 0, size, client_iova, bo_flags, + &out_vma->msm.iova); if (result != VK_SUCCESS) return result; if (flags & TU_SPARSE_VMA_MAP_ZERO) { + assert(dev->physical_device->has_vm_bind); result = tu_map_vm_bind(dev, MSM_VM_BIND_OP_MAP_NULL, 0, out_vma->msm.iova, 0, 0, size); } @@ -1141,8 +1151,10 @@ static void msm_sparse_vma_finish(struct tu_device *dev, struct tu_sparse_vma *vma) { - tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, vma->msm.iova, 0, 0, - vma->msm.size); + if (dev->physical_device->has_vm_bind) { + tu_map_vm_bind(dev, MSM_VM_BIND_OP_UNMAP, 0, vma->msm.iova, 0, 0, + vma->msm.size); + } mtx_lock(&dev->vma_mutex); util_vma_heap_free(&dev->vma, vma->msm.iova, vma->msm.size); @@ -1549,6 +1561,7 @@ tu_knl_drm_msm_load(struct tu_instance *instance, device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start, &device->va_size); + device->has_lazy_bos = device->has_set_iova; device->has_raytracing = tu_drm_get_raytracing(device); device->has_sparse_prr = tu_drm_get_prr(device); diff --git a/src/freedreno/vulkan/tu_knl_drm_virtio.cc b/src/freedreno/vulkan/tu_knl_drm_virtio.cc index dc5d80b6733..4cbb0c51638 100644 --- a/src/freedreno/vulkan/tu_knl_drm_virtio.cc +++ b/src/freedreno/vulkan/tu_knl_drm_virtio.cc @@ -678,6 +678,7 @@ virtio_bo_init(struct tu_device *dev, uint64_t client_iova, VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, + struct tu_sparse_vma *lazy_vma, const char *name) { MESA_TRACE_FUNC(); @@ -686,7 +687,7 @@ virtio_bo_init(struct tu_device *dev, .hdr = MSM_CCMD(GEM_NEW, sizeof(req)), .size = size, }; - VkResult result; + VkResult result = VK_SUCCESS; uint32_t res_id; struct tu_bo *bo; @@ -716,10 +717,14 @@ virtio_bo_init(struct tu_device *dev, assert(!(flags & TU_BO_ALLOC_DMABUF)); - mtx_lock(&dev->vma_mutex); - result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova, - flags, &req.iova); - mtx_unlock(&dev->vma_mutex); + if (lazy_vma) { + req.iova = lazy_vma->msm.iova; + } else { + mtx_lock(&dev->vma_mutex); + result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova, + flags, &req.iova); + mtx_unlock(&dev->vma_mutex); + } if (result != VK_SUCCESS) return result; @@ -910,6 +915,45 @@ virtio_bo_finish(struct tu_device *dev, struct tu_bo *bo) u_rwlock_rdunlock(&dev->dma_bo_lock); } +static VkResult +virtio_sparse_vma_init(struct tu_device *dev, + struct vk_object_base *base, + struct tu_sparse_vma *out_vma, + uint64_t *out_iova, + enum tu_sparse_vma_flags flags, + uint64_t size, uint64_t client_iova) +{ + VkResult result; + enum tu_bo_alloc_flags bo_flags = + (flags & TU_SPARSE_VMA_REPLAYABLE) ? TU_BO_ALLOC_REPLAYABLE : + (enum tu_bo_alloc_flags)0; + + out_vma->msm.size = size; + + mtx_lock(&dev->vma_mutex); + result = virtio_allocate_userspace_iova_locked(dev, 0, size, client_iova, + bo_flags, &out_vma->msm.iova); + mtx_unlock(&dev->vma_mutex); + + if (result != VK_SUCCESS) + return result; + + assert(!(flags & TU_SPARSE_VMA_MAP_ZERO)); + + *out_iova = out_vma->msm.iova; + + return result; +} + +static void +virtio_sparse_vma_finish(struct tu_device *dev, + struct tu_sparse_vma *vma) +{ + mtx_lock(&dev->vma_mutex); + util_vma_heap_free(&dev->vma, vma->msm.iova, vma->msm.size); + mtx_unlock(&dev->vma_mutex); +} + static VkResult setup_fence_cmds(struct tu_device *dev) { @@ -1156,6 +1200,8 @@ static const struct tu_knl virtio_knl_funcs = { .submit_add_entries = msm_submit_add_entries, .queue_submit = virtio_queue_submit, .queue_wait_fence = virtio_queue_wait_fence, + .sparse_vma_init = virtio_sparse_vma_init, + .sparse_vma_finish = virtio_sparse_vma_finish, }; VkResult @@ -1282,6 +1328,7 @@ tu_knl_drm_virtio_load(struct tu_instance *instance, device->va_size = caps.u.msm.va_size; device->ubwc_config.highest_bank_bit = caps.u.msm.highest_bank_bit; device->has_set_iova = true; + device->has_lazy_bos = true; device->has_preemption = has_preemption; device->uche_trap_base = uche_trap_base; diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc index 28b5acff0cd..a16a5f9207d 100644 --- a/src/freedreno/vulkan/tu_knl_kgsl.cc +++ b/src/freedreno/vulkan/tu_knl_kgsl.cc @@ -197,6 +197,39 @@ kgsl_bo_user_map(struct tu_device *dev, struct tu_bo *bo, uint64_t client_iova) return VK_SUCCESS; } +static VkResult +kgsl_sparse_vma_map(struct tu_device *dev, + struct tu_sparse_vma *vma, + struct tu_bo *bo, uint64_t bo_offset) +{ + struct kgsl_gpumem_bind_range range = { + .child_offset = bo_offset, + .target_offset = 0, + .length = vma->kgsl.virtual_bo->size, + .child_id = bo->gem_handle, + .op = KGSL_GPUMEM_RANGE_OP_BIND, + }; + + struct kgsl_gpumem_bind_ranges req = { + .ranges = (uint64_t)(uintptr_t)&range, + .ranges_nents = 1, + .ranges_size = sizeof(range), + .id = vma->kgsl.virtual_bo->gem_handle, + .flags = 0, + }; + + int ret; + + ret = safe_ioctl(dev->physical_device->local_fd, + IOCTL_KGSL_GPUMEM_BIND_RANGES, &req); + if (ret) { + return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "GPUMEM_BIND_RANGES failed (%s)", strerror(errno)); + } + + return VK_SUCCESS; +} + static VkResult kgsl_bo_init(struct tu_device *dev, struct vk_object_base *base, @@ -205,6 +238,7 @@ kgsl_bo_init(struct tu_device *dev, uint64_t client_iova, VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, + struct tu_sparse_vma *lazy_vma, const char *name) { if (flags & TU_BO_ALLOC_SHAREABLE) { @@ -269,12 +303,17 @@ kgsl_bo_init(struct tu_device *dev, .base = base, }; - if (flags & TU_BO_ALLOC_REPLAYABLE) { - VkResult result = kgsl_bo_user_map(dev, bo, client_iova); - if (result != VK_SUCCESS) - return result; + VkResult result = VK_SUCCESS; + + if (lazy_vma) { + result = kgsl_sparse_vma_map(dev, lazy_vma, bo, 0); + } else if (flags & TU_BO_ALLOC_REPLAYABLE) { + result = kgsl_bo_user_map(dev, bo, client_iova); } + if (result != VK_SUCCESS) + return result; + tu_dump_bo_init(dev, bo); *out_bo = bo; @@ -478,39 +517,6 @@ kgsl_sparse_vma_init(struct tu_device *dev, return VK_SUCCESS; } -static VkResult -kgsl_sparse_vma_map(struct tu_device *dev, - struct tu_sparse_vma *vma, - struct tu_bo *bo, uint64_t bo_offset) -{ - struct kgsl_gpumem_bind_range range = { - .child_offset = bo_offset, - .target_offset = 0, - .length = vma->kgsl.virtual_bo->size, - .child_id = bo->gem_handle, - .op = KGSL_GPUMEM_RANGE_OP_BIND, - }; - - struct kgsl_gpumem_bind_ranges req = { - .ranges = (uint64_t)(uintptr_t)&range, - .ranges_nents = 1, - .ranges_size = sizeof(range), - .id = vma->kgsl.virtual_bo->gem_handle, - .flags = 0, - }; - - int ret; - - ret = safe_ioctl(dev->physical_device->local_fd, - IOCTL_KGSL_GPUMEM_BIND_RANGES, &req); - if (ret) { - return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "GPUMEM_BIND_RANGES failed (%s)", strerror(errno)); - } - - return VK_SUCCESS; -} - static void kgsl_sparse_vma_finish(struct tu_device *dev, struct tu_sparse_vma *vma) @@ -1809,6 +1815,7 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd) device->has_sparse = kgsl_is_virtual_bo_supported(fd); device->has_sparse_prr = device->has_sparse; + device->has_lazy_bos = device->has_sparse; get_kgsl_prop(fd, KGSL_PROP_GPU_VA64_SIZE, &device->va_size, sizeof(device->va_size)); /* We don't actually use the VMA, but set a fake offset so that it doesn't