st/mesa: optimize glCopyImageSubData for 3D and array textures
radeonsi is significantly faster if we use a 3D copy instead of copying layers separately. (about 2-10x faster) Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40071>
This commit is contained in:
parent
03d2cc2b2a
commit
857f5a8e9c
6 changed files with 48 additions and 32 deletions
|
|
@ -677,6 +677,7 @@ Capability about the features and limits of the driver/GPU.
|
|||
* ``pipe_caps.clear_masked``: Whether ``clear`` can accept a color_clear_mask for all color buffers and stencil_clear_mask.
|
||||
* ``pipe_caps.prefer_persp``: Whether the driver prefers perspective correct
|
||||
or linear interpolation. This is a performance hint.
|
||||
* ``pipe_caps.blit_3d``: Whether pipe_context::blit can have depth > 1.
|
||||
|
||||
|
||||
.. _pipe_shader_caps:
|
||||
|
|
|
|||
|
|
@ -1198,6 +1198,7 @@ void si_init_screen_caps(struct si_screen *sscreen)
|
|||
caps->has_const_bw = true;
|
||||
caps->cl_gl_sharing = true;
|
||||
caps->call_finalize_nir_in_linker = true;
|
||||
caps->blit_3d = true;
|
||||
|
||||
/* Fixup dmabuf caps for the virtio + vpipe case (when fd=-1, u_init_pipe_screen_caps
|
||||
* fails to set this capability). */
|
||||
|
|
|
|||
|
|
@ -1062,6 +1062,7 @@ struct pipe_caps {
|
|||
bool mesh_shader;
|
||||
bool representative_fragment_test;
|
||||
bool prefer_persp;
|
||||
bool blit_3d;
|
||||
|
||||
int accelerated;
|
||||
int min_texel_offset;
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#include "textureview.h"
|
||||
#include "glformats.h"
|
||||
#include "api_exec_decl.h"
|
||||
#include "pipe/p_screen.h"
|
||||
|
||||
#include "state_tracker/st_cb_copyimage.h"
|
||||
|
||||
|
|
@ -550,35 +551,45 @@ copy_image_subdata(struct gl_context *ctx,
|
|||
int dstX, int dstY, int dstZ, int dstLevel,
|
||||
int srcWidth, int srcHeight, int srcDepth)
|
||||
{
|
||||
/* loop over 2D slices/faces/layers */
|
||||
for (int i = 0; i < srcDepth; ++i) {
|
||||
int newSrcZ = srcZ + i;
|
||||
int newDstZ = dstZ + i;
|
||||
bool src_is_cubemap = srcTexImage &&
|
||||
srcTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP;
|
||||
bool dst_is_cubemap = dstTexImage &&
|
||||
dstTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP;
|
||||
|
||||
if (srcTexImage &&
|
||||
srcTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
|
||||
/* need to update srcTexImage pointer for the cube face */
|
||||
assert(srcZ + i < MAX_FACES);
|
||||
srcTexImage = srcTexImage->TexObject->Image[srcZ + i][srcLevel];
|
||||
assert(srcTexImage);
|
||||
newSrcZ = 0;
|
||||
}
|
||||
if (src_is_cubemap || dst_is_cubemap || !ctx->screen->caps.blit_3d) {
|
||||
/* loop over cubemap faces/layers */
|
||||
for (int i = 0; i < srcDepth; ++i) {
|
||||
int newSrcZ = srcZ + i;
|
||||
int newDstZ = dstZ + i;
|
||||
|
||||
if (dstTexImage &&
|
||||
dstTexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) {
|
||||
/* need to update dstTexImage pointer for the cube face */
|
||||
assert(dstZ + i < MAX_FACES);
|
||||
dstTexImage = dstTexImage->TexObject->Image[dstZ + i][dstLevel];
|
||||
assert(dstTexImage);
|
||||
newDstZ = 0;
|
||||
}
|
||||
if (src_is_cubemap) {
|
||||
/* need to update srcTexImage pointer for the cube face */
|
||||
assert(srcZ + i < MAX_FACES);
|
||||
srcTexImage = srcTexImage->TexObject->Image[srcZ + i][srcLevel];
|
||||
assert(srcTexImage);
|
||||
newSrcZ = 0;
|
||||
}
|
||||
|
||||
st_CopyImageSubData(ctx,
|
||||
srcTexImage, srcRenderbuffer,
|
||||
srcX, srcY, newSrcZ,
|
||||
dstTexImage, dstRenderbuffer,
|
||||
dstX, dstY, newDstZ,
|
||||
srcWidth, srcHeight);
|
||||
if (dst_is_cubemap) {
|
||||
/* need to update dstTexImage pointer for the cube face */
|
||||
assert(dstZ + i < MAX_FACES);
|
||||
dstTexImage = dstTexImage->TexObject->Image[dstZ + i][dstLevel];
|
||||
assert(dstTexImage);
|
||||
newDstZ = 0;
|
||||
}
|
||||
|
||||
st_CopyImageSubData(ctx,
|
||||
srcTexImage, srcRenderbuffer,
|
||||
srcX, srcY, newSrcZ,
|
||||
dstTexImage, dstRenderbuffer,
|
||||
dstX, dstY, newDstZ,
|
||||
srcWidth, srcHeight, 1);
|
||||
}
|
||||
} else {
|
||||
st_CopyImageSubData(ctx,
|
||||
srcTexImage, srcRenderbuffer, srcX, srcY, srcZ,
|
||||
dstTexImage, dstRenderbuffer, dstX, dstY, dstZ,
|
||||
srcWidth, srcHeight, srcDepth);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -676,7 +676,7 @@ st_CopyImageSubData(struct gl_context *ctx,
|
|||
struct gl_texture_image *dst_image,
|
||||
struct gl_renderbuffer *dst_renderbuffer,
|
||||
int dst_x, int dst_y, int dst_z,
|
||||
int src_width, int src_height)
|
||||
int src_width, int src_height, int src_depth)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
struct pipe_context *pipe = st->pipe;
|
||||
|
|
@ -718,13 +718,15 @@ st_CopyImageSubData(struct gl_context *ctx,
|
|||
dst_level = 0;
|
||||
}
|
||||
|
||||
u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
|
||||
u_box_3d(src_x, src_y, src_z, src_width, src_height, src_depth, &box);
|
||||
|
||||
if ((src_image && st_compressed_format_fallback(st, src_image->TexFormat)) ||
|
||||
(dst_image && st_compressed_format_fallback(st, dst_image->TexFormat))) {
|
||||
fallback_copy_image(st, dst_image, dst_res, dst_x, dst_y, orig_dst_z,
|
||||
src_image, src_res, src_x, src_y, orig_src_z,
|
||||
src_width, src_height);
|
||||
for (int i = 0; i < src_depth; i++) {
|
||||
fallback_copy_image(st, dst_image, dst_res, dst_x, dst_y, orig_dst_z + i,
|
||||
src_image, src_res, src_x, src_y, orig_src_z + i,
|
||||
src_width, src_height);
|
||||
}
|
||||
} else {
|
||||
copy_image(pipe, dst_res, dst_level, dst_x, dst_y, dst_z,
|
||||
src_res, src_level, &box);
|
||||
|
|
|
|||
|
|
@ -33,6 +33,6 @@ st_CopyImageSubData(struct gl_context *ctx,
|
|||
struct gl_texture_image *dst_image,
|
||||
struct gl_renderbuffer *dst_renderbuffer,
|
||||
int dst_x, int dst_y, int dst_z,
|
||||
int src_width, int src_height);
|
||||
int src_width, int src_height, int src_depth);
|
||||
|
||||
#endif /* ST_CB_COPY_IMAGE_H */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue