ethosu: Compute is_partkernel during scheduling

As we need it for encoding the weights.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39611>
This commit is contained in:
Tomeu Vizoso 2026-02-17 09:17:58 +01:00 committed by Marge Bot
parent 3ade0a4dd6
commit 410d74e078
5 changed files with 14 additions and 28 deletions

View file

@ -206,7 +206,7 @@ emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation
stride |= ((operation->kernel.stride_y - 1) >> 1) << 9;
stride |= (operation->kernel.dilation_x - 1) << 3;
stride |= (operation->kernel.dilation_y - 1) << 4;
stride |= operation->conv.part_kernel_first << 2;
stride |= operation->block_config.is_partkernel << 2;
EMIT0(NPU_SET_KERNEL_STRIDE, stride);
}

View file

@ -110,7 +110,7 @@ fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
input_weights,
operation->block_config.ofm_block.depth,
operation->kernel.depthwise,
operation->conv.part_kernel_first,
operation->block_config.is_partkernel,
8 /* ifm_bitdepth */,
8 /* decomp_h */,
8 /* decomp_w */,

View file

@ -29,29 +29,6 @@ needed_total_padding(int input_size, int stride, int filter_size)
return MAX2(filter_size - (input_size % stride), 0);
}
static bool
ethosu_is_part_kernel_first(struct ethosu_operation *operation)
{
// Determine which block traversal strategy has better DPU utilization
unsigned kernel_size = operation->kernel.height * operation->kernel.width;
unsigned depth = operation->ifm.shape.depth;
float depth_utilization = (float)depth / ethosu_round_up_to_multiple(depth, 32);
float part_kernel_utilization = ((float)depth / ethosu_round_up_to_multiple(depth, 8));
part_kernel_utilization *= (float)kernel_size / ethosu_round_up_to_multiple(kernel_size, 4);
if (operation->type != ETHOSU_OPERATION_TYPE_CONVOLUTION)
return false;
if (operation->kernel.depthwise)
return false;
// Part-kernel first is always better for ifm depths <= 8
if (part_kernel_utilization >= depth_utilization || depth <= 8)
return true;
return false;
}
static void
set_feature_maps(struct pipe_tensor *input_tensor,
struct pipe_tensor *output_tensor,
@ -162,8 +139,6 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
operation->kernel.zero_points = NULL;
}
operation->conv.part_kernel_first = ethosu_is_part_kernel_first(operation);
if (poperation->conv.padding_same) {
unsigned vert = needed_total_padding(input_tensor->dims[1], poperation->conv.stride_y, poperation->conv.weight_tensor->dims[1]);
unsigned horiz = needed_total_padding(input_tensor->dims[2], poperation->conv.stride_x, poperation->conv.weight_tensor->dims[2]);

View file

@ -136,7 +136,6 @@ struct ethosu_operation {
struct {
struct ethosu_address_range weights;
struct ethosu_address_range scales;
bool part_kernel_first;
bool depthwise;
} conv;

View file

@ -90,6 +90,17 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
unsigned depth = MAX2(screen->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
bool is_part_kernel = false;
if (is_convolution) {
unsigned kernel_size = operation->kernel.width * operation->kernel.height;
unsigned ifm_depth = operation->ifm.shape.depth;
float depth_utilization = (float)ifm_depth / (float)ethosu_round_up_to_multiple(ifm_depth, 32);
float part_kernel_utilization = (float)ifm_depth / (float)ethosu_round_up_to_multiple(ifm_depth, 8);
part_kernel_utilization *= (float)kernel_size / (float)ethosu_round_up_to_multiple(kernel_size, 4);
if (!operation->kernel.depthwise && (part_kernel_utilization >= depth_utilization || ifm_depth <= 8))
is_part_kernel = true;
}
if (depth < operation->ofm.shape.depth) {
depth = align(depth, ARCH_SPLIT_DEPTH);
}
@ -170,6 +181,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
config.ofm_block.width = width;
config.ofm_block.depth = depth;
config.ofm_ublock = screen->ofm_ublock;
config.is_partkernel = is_part_kernel;
best_cost = relative_cost;
}