st/mesa: optimize glCopyImageSubData for 3D and array textures

radeonsi is significantly faster if we use a 3D copy instead of copying
layers separately. (about 2-10x faster)

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40071>
This commit is contained in:
Marek Olšák 2026-02-23 12:34:27 -05:00 committed by Marge Bot
parent 03d2cc2b2a
commit 857f5a8e9c
6 changed files with 48 additions and 32 deletions

View file

@ -677,6 +677,7 @@ Capability about the features and limits of the driver/GPU.
* ``pipe_caps.clear_masked``: Whether ``clear`` can accept a color_clear_mask for all color buffers and stencil_clear_mask.
* ``pipe_caps.prefer_persp``: Whether the driver prefers perspective correct
or linear interpolation. This is a performance hint.
* ``pipe_caps.blit_3d``: Whether pipe_context::blit can have depth > 1.
.. _pipe_shader_caps:

View file

@ -1198,6 +1198,7 @@ void si_init_screen_caps(struct si_screen *sscreen)
caps->has_const_bw = true;
caps->cl_gl_sharing = true;
caps->call_finalize_nir_in_linker = true;
caps->blit_3d = true;
/* Fixup dmabuf caps for the virtio + vpipe case (when fd=-1, u_init_pipe_screen_caps
* fails to set this capability). */

View file

@ -1062,6 +1062,7 @@ struct pipe_caps {
bool mesh_shader;
bool representative_fragment_test;
bool prefer_persp;
bool blit_3d;
int accelerated;
int min_texel_offset;

View file

@ -32,6 +32,7 @@
#include "textureview.h"
#include "glformats.h"
#include "api_exec_decl.h"
#include "pipe/p_screen.h"
#include "state_tracker/st_cb_copyimage.h"
@ -550,35 +551,45 @@ copy_image_subdata(struct gl_context *ctx,
int dstX, int dstY, int dstZ, int dstLevel,
int srcWidth, int srcHeight, int srcDepth)
{
/* loop over 2D slices/faces/layers */
for (int i = 0; i < srcDepth; ++i) {
int newSrcZ = srcZ + i;
int newDstZ = dstZ + i;
bool src_is_cubemap = srcTexImage &&
srcTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP;
bool dst_is_cubemap = dstTexImage &&
dstTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP;
if (srcTexImage &&
srcTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
/* need to update srcTexImage pointer for the cube face */
assert(srcZ + i < MAX_FACES);
srcTexImage = srcTexImage->TexObject->Image[srcZ + i][srcLevel];
assert(srcTexImage);
newSrcZ = 0;
}
if (src_is_cubemap || dst_is_cubemap || !ctx->screen->caps.blit_3d) {
/* loop over cubemap faces/layers */
for (int i = 0; i < srcDepth; ++i) {
int newSrcZ = srcZ + i;
int newDstZ = dstZ + i;
if (dstTexImage &&
dstTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
/* need to update dstTexImage pointer for the cube face */
assert(dstZ + i < MAX_FACES);
dstTexImage = dstTexImage->TexObject->Image[dstZ + i][dstLevel];
assert(dstTexImage);
newDstZ = 0;
}
if (src_is_cubemap) {
/* need to update srcTexImage pointer for the cube face */
assert(srcZ + i < MAX_FACES);
srcTexImage = srcTexImage->TexObject->Image[srcZ + i][srcLevel];
assert(srcTexImage);
newSrcZ = 0;
}
st_CopyImageSubData(ctx,
srcTexImage, srcRenderbuffer,
srcX, srcY, newSrcZ,
dstTexImage, dstRenderbuffer,
dstX, dstY, newDstZ,
srcWidth, srcHeight);
if (dst_is_cubemap) {
/* need to update dstTexImage pointer for the cube face */
assert(dstZ + i < MAX_FACES);
dstTexImage = dstTexImage->TexObject->Image[dstZ + i][dstLevel];
assert(dstTexImage);
newDstZ = 0;
}
st_CopyImageSubData(ctx,
srcTexImage, srcRenderbuffer,
srcX, srcY, newSrcZ,
dstTexImage, dstRenderbuffer,
dstX, dstY, newDstZ,
srcWidth, srcHeight, 1);
}
} else {
st_CopyImageSubData(ctx,
srcTexImage, srcRenderbuffer, srcX, srcY, srcZ,
dstTexImage, dstRenderbuffer, dstX, dstY, dstZ,
srcWidth, srcHeight, srcDepth);
}
}

View file

@ -676,7 +676,7 @@ st_CopyImageSubData(struct gl_context *ctx,
struct gl_texture_image *dst_image,
struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height)
int src_width, int src_height, int src_depth)
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
@ -718,13 +718,15 @@ st_CopyImageSubData(struct gl_context *ctx,
dst_level = 0;
}
u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
u_box_3d(src_x, src_y, src_z, src_width, src_height, src_depth, &box);
if ((src_image && st_compressed_format_fallback(st, src_image->TexFormat)) ||
(dst_image && st_compressed_format_fallback(st, dst_image->TexFormat))) {
fallback_copy_image(st, dst_image, dst_res, dst_x, dst_y, orig_dst_z,
src_image, src_res, src_x, src_y, orig_src_z,
src_width, src_height);
for (int i = 0; i < src_depth; i++) {
fallback_copy_image(st, dst_image, dst_res, dst_x, dst_y, orig_dst_z + i,
src_image, src_res, src_x, src_y, orig_src_z + i,
src_width, src_height);
}
} else {
copy_image(pipe, dst_res, dst_level, dst_x, dst_y, dst_z,
src_res, src_level, &box);

View file

@ -33,6 +33,6 @@ st_CopyImageSubData(struct gl_context *ctx,
struct gl_texture_image *dst_image,
struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height);
int src_width, int src_height, int src_depth);
#endif /* ST_CB_COPY_IMAGE_H */