panvk/csf: implement VK_EXT_primitives_generated_query except primitive restart
Primitive restart requires scanning the index buffer to determine how many primitives are present, and will be handled in a later commit. Signed-off-by: Olivia Lee <olivia.lee@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38547>
This commit is contained in:
parent
8f5d9f6fd7
commit
1bdd640d83
6 changed files with 319 additions and 123 deletions
|
|
@ -5,10 +5,39 @@
|
|||
|
||||
#include "compiler/libcl/libcl.h"
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "lib/pan_encoder.h"
|
||||
#include "poly/cl/restart.h"
|
||||
#include "draw_helper.h"
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
KERNEL(1)
|
||||
panlib_update_prims_generated_query_indirect(
|
||||
global uint32_t *prims_generated, global uint32_t *draw_count_buffer,
|
||||
uint32_t max_draw_count, uint32_t cmd_stride, constant uint32_t *cmd,
|
||||
uint32_t view_count, uint32_t compact_prim__11)
|
||||
{
|
||||
enum mesa_prim prim = poly_uncompact_prim(compact_prim__11);
|
||||
uint32_t draw_count = draw_count_buffer ?
|
||||
min(*draw_count_buffer, max_draw_count) : max_draw_count;
|
||||
|
||||
for (uint32_t draw_id = 0; draw_id < draw_count; draw_id++) {
|
||||
/* cmd may be either VkDrawnIndirectCommand or
|
||||
* VkDrawIndexedIndirectCommand. In both cases the vertex/index count is
|
||||
* the first field, and the instance count is the second */
|
||||
uint32_t vertex_count = cmd[0];
|
||||
uint32_t instance_count = cmd[1];
|
||||
|
||||
uint32_t prims_per_instance =
|
||||
u_decomposed_prims_for_vertices(prim, vertex_count);
|
||||
*prims_generated += prims_per_instance * instance_count;
|
||||
|
||||
cmd = (constant uint32_t *) ((uintptr_t) cmd + cmd_stride);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (PAN_ARCH == 6 || PAN_ARCH == 7)
|
||||
struct panlib_draw_info {
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
#include "panvk_cmd_draw.h"
|
||||
#include "panvk_cmd_fb_preload.h"
|
||||
#include "panvk_cmd_meta.h"
|
||||
#include "panvk_cmd_precomp.h"
|
||||
#include "panvk_cmd_ts.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
|
|
@ -48,6 +49,7 @@
|
|||
#include "vk_meta.h"
|
||||
#include "vk_pipeline_layout.h"
|
||||
#include "vk_render_pass.h"
|
||||
#include "poly/geometry.h"
|
||||
|
||||
static enum cs_reg_perm
|
||||
provoking_vertex_fn_reg_perm_cb(struct cs_builder *b, unsigned reg)
|
||||
|
|
@ -2358,6 +2360,60 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
update_prims_generated_query(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct panvk_draw_info *draw)
|
||||
{
|
||||
struct cs_builder *b =
|
||||
panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
|
||||
struct vk_input_assembly_state *ia = &cmdbuf->vk.dynamic_graphics_state.ia;
|
||||
struct panvk_prims_generated_query_state *state =
|
||||
&cmdbuf->state.gfx.prims_generated_query;
|
||||
|
||||
if (!state->ptr)
|
||||
return;
|
||||
|
||||
/* TODO: primitive restart */
|
||||
assert(!draw->index.size || !ia->primitive_restart_enable);
|
||||
|
||||
enum mesa_prim prim = vk_topology_to_mesa(ia->primitive_topology);
|
||||
uint32_t view_count = cmdbuf->state.gfx.render.view_mask ?
|
||||
util_bitcount(cmdbuf->state.gfx.render.view_mask) : 1;
|
||||
|
||||
if (draw->indirect.buffer_dev_addr) {
|
||||
struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmdbuf);
|
||||
|
||||
struct panlib_update_prims_generated_query_indirect_args args = {
|
||||
.prims_generated = state->ptr,
|
||||
.draw_count_buffer = draw->indirect.count_buffer_dev_addr,
|
||||
.max_draw_count = draw->indirect.draw_count,
|
||||
.cmd_stride = draw->indirect.stride,
|
||||
.cmd = draw->indirect.buffer_dev_addr,
|
||||
.view_count = view_count,
|
||||
};
|
||||
|
||||
/* We need to WAIT in order to avoid overlapping the (non-atomic) direct
|
||||
* draw counter updates with indirect draws. TODO: we could avoid that
|
||||
* by having separate direct/indirect counters and adding them on read */
|
||||
panlib_update_prims_generated_query_indirect_struct(
|
||||
&precomp_ctx, panlib_1d(1), PANLIB_BARRIER_CSF_WAIT, args,
|
||||
poly_compact_prim(prim));
|
||||
} else {
|
||||
uint32_t prims_per_instance =
|
||||
u_decomposed_prims_for_vertices(prim, draw->vertex.count);
|
||||
uint32_t prims_generated =
|
||||
prims_per_instance * draw->instance.count * view_count;
|
||||
|
||||
struct cs_index addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index value = cs_scratch_reg32(b, 2);
|
||||
|
||||
cs_move64_to(b, addr, state->ptr);
|
||||
cs_load32_to(b, value, addr, 0);
|
||||
cs_add32(b, value, value, prims_generated);
|
||||
cs_store32(b, value, addr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
{
|
||||
|
|
@ -2382,6 +2438,8 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
|||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
update_prims_generated_query(cmdbuf, draw);
|
||||
|
||||
cs_update_vt_ctx(b) {
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, GLOBAL_ATTRIBUTE_OFFSET), 0);
|
||||
cs_move32_to(b, cs_sr_reg32(b, IDVS, INDEX_COUNT), draw->vertex.count);
|
||||
|
|
@ -2562,6 +2620,8 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
update_prims_generated_query(cmdbuf, draw);
|
||||
|
||||
struct panvk_shader_desc_state *vs_desc_state =
|
||||
&cmdbuf->state.gfx.vs.desc;
|
||||
const struct vk_dynamic_graphics_state *dyns =
|
||||
|
|
|
|||
|
|
@ -26,6 +26,21 @@
|
|||
#include "panvk_query_pool.h"
|
||||
#include "panvk_queue.h"
|
||||
|
||||
static enum panvk_subqueue_id
|
||||
panvk_subqueue_for_query_type(VkQueryType type)
|
||||
{
|
||||
/* timestamp queries are not handled here , because they may be written
|
||||
* from any subqueue */
|
||||
switch (type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
return PANVK_SUBQUEUE_FRAGMENT;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
return PANVK_SUBQUEUE_COMPUTE;
|
||||
default:
|
||||
UNREACHABLE("Unsupported query type");
|
||||
}
|
||||
}
|
||||
|
||||
/* At the API level, a query consists of a status and a result. Both are
|
||||
* uninitialized initially. There are these query operations:
|
||||
*
|
||||
|
|
@ -52,6 +67,7 @@
|
|||
* 0 and does not need to wait.
|
||||
*/
|
||||
|
||||
/* Default path for query types that don't need special logic */
|
||||
static void
|
||||
reset_queries_batch(struct cs_builder *b, struct cs_index addr,
|
||||
struct cs_index zero_regs, uint32_t query_count)
|
||||
|
|
@ -92,11 +108,13 @@ reset_queries_batch(struct cs_builder *b, struct cs_index addr,
|
|||
}
|
||||
|
||||
static void
|
||||
panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool,
|
||||
uint32_t first_query, uint32_t query_count)
|
||||
panvk_cmd_reset_queries(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool, uint32_t first_query,
|
||||
uint32_t query_count)
|
||||
{
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmd, PANVK_SUBQUEUE_FRAGMENT);
|
||||
enum panvk_subqueue_id subqueue =
|
||||
panvk_subqueue_for_query_type(pool->vk.query_type);
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmd, subqueue);
|
||||
|
||||
/* Wait on deferred sync to ensure all prior query operations have
|
||||
* completed
|
||||
|
|
@ -132,6 +150,118 @@ panvk_cmd_reset_occlusion_queries(struct panvk_cmd_buffer *cmd,
|
|||
cs_wait_slot(b, SB_ID(IMM_FLUSH));
|
||||
}
|
||||
|
||||
static void
|
||||
copy_result_batch(struct cs_builder *b,
|
||||
VkQueryResultFlags flags,
|
||||
struct cs_index dst_addr,
|
||||
VkDeviceSize dst_stride,
|
||||
struct cs_index res_addr,
|
||||
struct cs_index avail_addr,
|
||||
struct cs_index scratch_regs,
|
||||
uint32_t query_count)
|
||||
{
|
||||
uint32_t res_size = (flags & VK_QUERY_RESULT_64_BIT) ? 2 : 1;
|
||||
uint32_t regs_per_copy =
|
||||
res_size + ((flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? 1 : 0);
|
||||
|
||||
assert(query_count <= scratch_regs.size / regs_per_copy);
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i++) {
|
||||
struct cs_index res =
|
||||
cs_reg_tuple(b, scratch_regs.reg + (i * regs_per_copy), res_size);
|
||||
struct cs_index avail = cs_reg32(b, res.reg + res_size);
|
||||
|
||||
cs_load_to(b, res, res_addr, BITFIELD_MASK(res.size),
|
||||
i * sizeof(uint64_t));
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
cs_load32_to(b, avail, avail_addr, i * sizeof(struct panvk_cs_sync32));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i++) {
|
||||
struct cs_index store_src =
|
||||
cs_reg_tuple(b, scratch_regs.reg + (i * regs_per_copy), regs_per_copy);
|
||||
|
||||
cs_store(b, store_src, dst_addr, BITFIELD_MASK(regs_per_copy),
|
||||
i * dst_stride);
|
||||
}
|
||||
|
||||
/* Flush the stores. */
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_copy_query_results(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool,
|
||||
uint32_t first_query, uint32_t query_count,
|
||||
uint64_t dst_buffer_addr,
|
||||
VkDeviceSize stride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
enum panvk_subqueue_id subqueue =
|
||||
panvk_subqueue_for_query_type(pool->vk.query_type);
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmd, subqueue);
|
||||
|
||||
/* Wait for query syncobjs to be signalled. */
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT)
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
|
||||
|
||||
uint32_t res_size = (flags & VK_QUERY_RESULT_64_BIT) ? 2 : 1;
|
||||
uint32_t regs_per_copy =
|
||||
res_size + ((flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? 1 : 0);
|
||||
|
||||
struct cs_index dst_addr = cs_scratch_reg64(b, 16);
|
||||
struct cs_index res_addr = cs_scratch_reg64(b, 14);
|
||||
struct cs_index avail_addr = cs_scratch_reg64(b, 12);
|
||||
struct cs_index counter = cs_scratch_reg32(b, 11);
|
||||
struct cs_index scratch_regs = cs_scratch_reg_tuple(b, 0, 11);
|
||||
uint32_t queries_per_batch = scratch_regs.size / regs_per_copy;
|
||||
|
||||
if (stride > 0) {
|
||||
/* Store offset is a 16-bit signed integer, so we might be limited by the
|
||||
* stride here. */
|
||||
queries_per_batch = MIN2(((1u << 15) / stride) + 1, queries_per_batch);
|
||||
}
|
||||
|
||||
/* Stop unrolling the loop when it takes more than 2 steps to copy the
|
||||
* queries. */
|
||||
if (query_count > 2 * queries_per_batch) {
|
||||
uint32_t copied_query_count =
|
||||
query_count - (query_count % queries_per_batch);
|
||||
|
||||
cs_move32_to(b, counter, copied_query_count);
|
||||
cs_move64_to(b, dst_addr, dst_buffer_addr);
|
||||
cs_move64_to(b, res_addr, panvk_query_report_dev_addr(pool, first_query));
|
||||
cs_move64_to(b, avail_addr,
|
||||
panvk_query_available_dev_addr(pool, first_query));
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
copy_result_batch(b, flags, dst_addr, stride, res_addr, avail_addr,
|
||||
scratch_regs, queries_per_batch);
|
||||
|
||||
cs_add32(b, counter, counter, -queries_per_batch);
|
||||
cs_add64(b, dst_addr, dst_addr, queries_per_batch * stride);
|
||||
cs_add64(b, res_addr, res_addr, queries_per_batch * sizeof(uint64_t));
|
||||
cs_add64(b, avail_addr, avail_addr,
|
||||
queries_per_batch * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
dst_buffer_addr += stride * copied_query_count;
|
||||
first_query += copied_query_count;
|
||||
query_count -= copied_query_count;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i += queries_per_batch) {
|
||||
cs_move64_to(b, dst_addr, dst_buffer_addr + (i * stride));
|
||||
cs_move64_to(b, res_addr,
|
||||
panvk_query_report_dev_addr(pool, i + first_query));
|
||||
cs_move64_to(b, avail_addr,
|
||||
panvk_query_available_dev_addr(pool, i + first_query));
|
||||
copy_result_batch(b, flags, dst_addr, stride, res_addr, avail_addr,
|
||||
scratch_regs,
|
||||
MIN2(queries_per_batch, query_count - i));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_cmd_begin_occlusion_query(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool, uint32_t query,
|
||||
|
|
@ -206,116 +336,6 @@ panvk_cmd_end_occlusion_query(struct panvk_cmd_buffer *cmd,
|
|||
cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC)));
|
||||
}
|
||||
|
||||
static void
|
||||
copy_oq_result_batch(struct cs_builder *b,
|
||||
VkQueryResultFlags flags,
|
||||
struct cs_index dst_addr,
|
||||
VkDeviceSize dst_stride,
|
||||
struct cs_index res_addr,
|
||||
struct cs_index avail_addr,
|
||||
struct cs_index scratch_regs,
|
||||
uint32_t query_count)
|
||||
{
|
||||
uint32_t res_size = (flags & VK_QUERY_RESULT_64_BIT) ? 2 : 1;
|
||||
uint32_t regs_per_copy =
|
||||
res_size + ((flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? 1 : 0);
|
||||
|
||||
assert(query_count <= scratch_regs.size / regs_per_copy);
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i++) {
|
||||
struct cs_index res =
|
||||
cs_reg_tuple(b, scratch_regs.reg + (i * regs_per_copy), res_size);
|
||||
struct cs_index avail = cs_reg32(b, res.reg + res_size);
|
||||
|
||||
cs_load_to(b, res, res_addr, BITFIELD_MASK(res.size),
|
||||
i * sizeof(uint64_t));
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
cs_load32_to(b, avail, avail_addr, i * sizeof(struct panvk_cs_sync32));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i++) {
|
||||
struct cs_index store_src =
|
||||
cs_reg_tuple(b, scratch_regs.reg + (i * regs_per_copy), regs_per_copy);
|
||||
|
||||
cs_store(b, store_src, dst_addr, BITFIELD_MASK(regs_per_copy),
|
||||
i * dst_stride);
|
||||
}
|
||||
|
||||
/* Flush the stores. */
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_copy_occlusion_query_results(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool,
|
||||
uint32_t first_query, uint32_t query_count,
|
||||
uint64_t dst_buffer_addr,
|
||||
VkDeviceSize stride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmd, PANVK_SUBQUEUE_FRAGMENT);
|
||||
|
||||
/* Wait for occlusion query syncobjs to be signalled. */
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT)
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC));
|
||||
|
||||
uint32_t res_size = (flags & VK_QUERY_RESULT_64_BIT) ? 2 : 1;
|
||||
uint32_t regs_per_copy =
|
||||
res_size + ((flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? 1 : 0);
|
||||
|
||||
struct cs_index dst_addr = cs_scratch_reg64(b, 16);
|
||||
struct cs_index res_addr = cs_scratch_reg64(b, 14);
|
||||
struct cs_index avail_addr = cs_scratch_reg64(b, 12);
|
||||
struct cs_index counter = cs_scratch_reg32(b, 11);
|
||||
struct cs_index scratch_regs = cs_scratch_reg_tuple(b, 0, 11);
|
||||
uint32_t queries_per_batch = scratch_regs.size / regs_per_copy;
|
||||
|
||||
if (stride > 0) {
|
||||
/* Store offset is a 16-bit signed integer, so we might be limited by the
|
||||
* stride here. */
|
||||
queries_per_batch = MIN2(((1u << 15) / stride) + 1, queries_per_batch);
|
||||
}
|
||||
|
||||
/* Stop unrolling the loop when it takes more than 2 steps to copy the
|
||||
* queries. */
|
||||
if (query_count > 2 * queries_per_batch) {
|
||||
uint32_t copied_query_count =
|
||||
query_count - (query_count % queries_per_batch);
|
||||
|
||||
cs_move32_to(b, counter, copied_query_count);
|
||||
cs_move64_to(b, dst_addr, dst_buffer_addr);
|
||||
cs_move64_to(b, res_addr, panvk_query_report_dev_addr(pool, first_query));
|
||||
cs_move64_to(b, avail_addr,
|
||||
panvk_query_available_dev_addr(pool, first_query));
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, counter) {
|
||||
copy_oq_result_batch(b, flags, dst_addr, stride, res_addr, avail_addr,
|
||||
scratch_regs, queries_per_batch);
|
||||
|
||||
cs_add32(b, counter, counter, -queries_per_batch);
|
||||
cs_add64(b, dst_addr, dst_addr, queries_per_batch * stride);
|
||||
cs_add64(b, res_addr, res_addr, queries_per_batch * sizeof(uint64_t));
|
||||
cs_add64(b, avail_addr, avail_addr,
|
||||
queries_per_batch * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
dst_buffer_addr += stride * copied_query_count;
|
||||
first_query += copied_query_count;
|
||||
query_count -= copied_query_count;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < query_count; i += queries_per_batch) {
|
||||
cs_move64_to(b, dst_addr, dst_buffer_addr + (i * stride));
|
||||
cs_move64_to(b, res_addr,
|
||||
panvk_query_report_dev_addr(pool, i + first_query));
|
||||
cs_move64_to(b, avail_addr,
|
||||
panvk_query_available_dev_addr(pool, i + first_query));
|
||||
copy_oq_result_batch(b, flags, dst_addr, stride, res_addr, avail_addr,
|
||||
scratch_regs,
|
||||
MIN2(queries_per_batch, query_count - i));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_cmd_reset_timestamp_queries(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool,
|
||||
|
|
@ -638,6 +658,64 @@ panvk_copy_timestamp_query_results(struct panvk_cmd_buffer *cmd,
|
|||
PANLIB_BARRIER_CSF_SYNC, push);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_cmd_begin_prims_generated_query(
|
||||
struct panvk_cmd_buffer *cmd, struct panvk_query_pool *pool, uint32_t query,
|
||||
VkQueryControlFlags flags)
|
||||
{
|
||||
uint64_t report_addr = panvk_query_report_dev_addr(pool, query);
|
||||
|
||||
cmd->state.gfx.prims_generated_query.ptr = report_addr;
|
||||
cmd->state.gfx.prims_generated_query.syncobj =
|
||||
panvk_query_available_dev_addr(pool, query);
|
||||
|
||||
/* From the Vulkan spec:
|
||||
*
|
||||
* "When a primitives generated query begins, the count of primitives
|
||||
* generated starts from zero."
|
||||
*
|
||||
*/
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmd, PANVK_SUBQUEUE_COMPUTE);
|
||||
|
||||
struct cs_index report_addr_gpu = cs_scratch_reg64(b, 0);
|
||||
struct cs_index clear_value = cs_scratch_reg64(b, 2);
|
||||
cs_move64_to(b, report_addr_gpu, report_addr);
|
||||
cs_move64_to(b, clear_value, 0);
|
||||
cs_store64(b, clear_value, report_addr_gpu, 0);
|
||||
cs_flush_stores(b);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_cmd_end_prims_generated_query(
|
||||
struct panvk_cmd_buffer *cmd, struct panvk_query_pool *pool, uint32_t query)
|
||||
{
|
||||
cmd->state.gfx.prims_generated_query.ptr = 0;
|
||||
cmd->state.gfx.prims_generated_query.syncobj = 0;
|
||||
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmd, PANVK_SUBQUEUE_COMPUTE);
|
||||
struct cs_index query_syncobj = cs_scratch_reg64(b, 0);
|
||||
struct cs_index val = cs_scratch_reg32(b, 2);
|
||||
|
||||
/* Query accumulates sample counts to the report which is on a cached memory.
|
||||
* Wait for the accumulation and flush the caches.
|
||||
*
|
||||
* No need to wait on iter_sb because all shader invocations to update the
|
||||
* counter use PANLIB_BARRIER_CSF_WAIT already, and all direct draws update
|
||||
* the counter synchronously.
|
||||
*/
|
||||
cs_move32_to(b, val, 0);
|
||||
cs_flush_caches(
|
||||
b, MALI_CS_FLUSH_MODE_CLEAN, MALI_CS_FLUSH_MODE_CLEAN,
|
||||
MALI_CS_OTHER_FLUSH_MODE_NONE, val,
|
||||
cs_defer(SB_IMM_MASK, SB_ID(DEFERRED_FLUSH)));
|
||||
|
||||
/* Signal the query syncobj after the flush is effective. */
|
||||
cs_move32_to(b, val, 1);
|
||||
cs_move64_to(b, query_syncobj, panvk_query_available_dev_addr(pool, query));
|
||||
cs_sync32_set(b, true, MALI_CS_SYNC_SCOPE_CSG, val, query_syncobj,
|
||||
cs_defer(SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC)));
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdResetQueryPool)(VkCommandBuffer commandBuffer,
|
||||
VkQueryPool queryPool, uint32_t firstQuery,
|
||||
|
|
@ -650,8 +728,9 @@ panvk_per_arch(CmdResetQueryPool)(VkCommandBuffer commandBuffer,
|
|||
return;
|
||||
|
||||
switch (pool->vk.query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
panvk_cmd_reset_occlusion_queries(cmd, pool, firstQuery, queryCount);
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
panvk_cmd_reset_queries(cmd, pool, firstQuery, queryCount);
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TIMESTAMP: {
|
||||
|
|
@ -680,6 +759,10 @@ panvk_per_arch(CmdBeginQueryIndexedEXT)(VkCommandBuffer commandBuffer,
|
|||
panvk_cmd_begin_occlusion_query(cmd, pool, query, flags);
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
panvk_cmd_begin_prims_generated_query(cmd, pool, query, flags);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE("Unsupported query type");
|
||||
}
|
||||
|
|
@ -701,6 +784,10 @@ panvk_per_arch(CmdEndQueryIndexedEXT)(VkCommandBuffer commandBuffer,
|
|||
panvk_cmd_end_occlusion_query(cmd, pool, query);
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
panvk_cmd_end_prims_generated_query(cmd, pool, query);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE("Unsupported query type");
|
||||
}
|
||||
|
|
@ -730,9 +817,10 @@ panvk_per_arch(CmdCopyQueryPoolResults)(
|
|||
uint64_t dst_buffer_addr = panvk_buffer_gpu_ptr(dst_buffer, dstOffset);
|
||||
|
||||
switch (pool->vk.query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
panvk_copy_occlusion_query_results(cmd, pool, firstQuery, queryCount,
|
||||
dst_buffer_addr, stride, flags);
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
panvk_copy_query_results(cmd, pool, firstQuery, queryCount,
|
||||
dst_buffer_addr, stride, flags);
|
||||
break;
|
||||
}
|
||||
#if PAN_ARCH >= 10
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
#include "panvk_blend.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_oq.h"
|
||||
#include "panvk_cmd_query.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_image.h"
|
||||
#include "panvk_image_view.h"
|
||||
|
|
@ -129,6 +129,9 @@ struct panvk_cmd_graphics_state {
|
|||
} dynamic;
|
||||
|
||||
struct panvk_occlusion_query_state occlusion_query;
|
||||
#if PAN_ARCH >= 10
|
||||
struct panvk_prims_generated_query_state prims_generated_query;
|
||||
#endif
|
||||
struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_CMD_OQ_H
|
||||
#define PANVK_CMD_OQ_H
|
||||
#ifndef PANVK_CMD_QUERY_H
|
||||
#define PANVK_CMD_QUERY_H
|
||||
|
||||
#ifndef PAN_ARCH
|
||||
#error "PAN_ARCH must be defined"
|
||||
|
|
@ -20,4 +20,11 @@ struct panvk_occlusion_query_state {
|
|||
enum mali_occlusion_mode mode;
|
||||
};
|
||||
|
||||
#endif
|
||||
#if PAN_ARCH >= 10
|
||||
struct panvk_prims_generated_query_state {
|
||||
uint64_t syncobj;
|
||||
uint64_t ptr;
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -66,6 +66,10 @@ panvk_per_arch(CreateQueryPool)(VkDevice _device,
|
|||
reports_per_query = PANVK_SUBQUEUE_COUNT + 1;
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
reports_per_query = 1;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
UNREACHABLE("Unsupported query type");
|
||||
|
|
@ -274,6 +278,11 @@ panvk_per_arch(GetQueryPoolResults)(VkDevice _device, VkQueryPool queryPool,
|
|||
pool->reports_per_query);
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
if (write_results)
|
||||
cpu_write_query_result(dst, 0, flags, src[0].value);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
UNREACHABLE("Unsupported query type");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue