From 410d74e0789ddabca4714bb7c5f0f5e47b5c4892 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Tue, 17 Feb 2026 09:17:58 +0100 Subject: [PATCH] ethosu: Compute is_partkernel during scheduling As we need it for encoding the weights. Part-of: --- src/gallium/drivers/ethosu/ethosu_cmd.c | 2 +- src/gallium/drivers/ethosu/ethosu_coefs.c | 2 +- src/gallium/drivers/ethosu/ethosu_lower.c | 25 ----------------------- src/gallium/drivers/ethosu/ethosu_ml.h | 1 - src/gallium/drivers/ethosu/ethosu_sched.c | 12 +++++++++++ 5 files changed, 14 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/ethosu/ethosu_cmd.c b/src/gallium/drivers/ethosu/ethosu_cmd.c index f3bd3463e8f..f6a88f7ffd7 100644 --- a/src/gallium/drivers/ethosu/ethosu_cmd.c +++ b/src/gallium/drivers/ethosu/ethosu_cmd.c @@ -206,7 +206,7 @@ emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation stride |= ((operation->kernel.stride_y - 1) >> 1) << 9; stride |= (operation->kernel.dilation_x - 1) << 3; stride |= (operation->kernel.dilation_y - 1) << 4; - stride |= operation->conv.part_kernel_first << 2; + stride |= operation->block_config.is_partkernel << 2; EMIT0(NPU_SET_KERNEL_STRIDE, stride); } diff --git a/src/gallium/drivers/ethosu/ethosu_coefs.c b/src/gallium/drivers/ethosu/ethosu_coefs.c index 25aeeed569f..1c309a376e1 100644 --- a/src/gallium/drivers/ethosu/ethosu_coefs.c +++ b/src/gallium/drivers/ethosu/ethosu_coefs.c @@ -110,7 +110,7 @@ fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio input_weights, operation->block_config.ofm_block.depth, operation->kernel.depthwise, - operation->conv.part_kernel_first, + operation->block_config.is_partkernel, 8 /* ifm_bitdepth */, 8 /* decomp_h */, 8 /* decomp_w */, diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index aacf1b96572..3395ca98745 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -29,29 +29,6 @@ needed_total_padding(int input_size, int stride, int filter_size) return MAX2(filter_size - (input_size % stride), 0); } -static bool -ethosu_is_part_kernel_first(struct ethosu_operation *operation) -{ - // Determine which block traversal strategy has better DPU utilization - unsigned kernel_size = operation->kernel.height * operation->kernel.width; - unsigned depth = operation->ifm.shape.depth; - float depth_utilization = (float)depth / ethosu_round_up_to_multiple(depth, 32); - float part_kernel_utilization = ((float)depth / ethosu_round_up_to_multiple(depth, 8)); - part_kernel_utilization *= (float)kernel_size / ethosu_round_up_to_multiple(kernel_size, 4); - - if (operation->type != ETHOSU_OPERATION_TYPE_CONVOLUTION) - return false; - - if (operation->kernel.depthwise) - return false; - - // Part-kernel first is always better for ifm depths <= 8 - if (part_kernel_utilization >= depth_utilization || depth <= 8) - return true; - - return false; -} - static void set_feature_maps(struct pipe_tensor *input_tensor, struct pipe_tensor *output_tensor, @@ -162,8 +139,6 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph, operation->kernel.zero_points = NULL; } - operation->conv.part_kernel_first = ethosu_is_part_kernel_first(operation); - if (poperation->conv.padding_same) { unsigned vert = needed_total_padding(input_tensor->dims[1], poperation->conv.stride_y, poperation->conv.weight_tensor->dims[1]); unsigned horiz = needed_total_padding(input_tensor->dims[2], poperation->conv.stride_x, poperation->conv.weight_tensor->dims[2]); diff --git a/src/gallium/drivers/ethosu/ethosu_ml.h b/src/gallium/drivers/ethosu/ethosu_ml.h index 9ce81c3290d..8db4adf4ec1 100644 --- a/src/gallium/drivers/ethosu/ethosu_ml.h +++ b/src/gallium/drivers/ethosu/ethosu_ml.h @@ -136,7 +136,6 @@ struct ethosu_operation { struct { struct ethosu_address_range weights; struct ethosu_address_range scales; - bool part_kernel_first; bool depthwise; } conv; diff --git a/src/gallium/drivers/ethosu/ethosu_sched.c b/src/gallium/drivers/ethosu/ethosu_sched.c index 5a93d023331..dadc30b79c5 100644 --- a/src/gallium/drivers/ethosu/ethosu_sched.c +++ b/src/gallium/drivers/ethosu/ethosu_sched.c @@ -90,6 +90,17 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope unsigned depth = MAX2(screen->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH)); + bool is_part_kernel = false; + if (is_convolution) { + unsigned kernel_size = operation->kernel.width * operation->kernel.height; + unsigned ifm_depth = operation->ifm.shape.depth; + float depth_utilization = (float)ifm_depth / (float)ethosu_round_up_to_multiple(ifm_depth, 32); + float part_kernel_utilization = (float)ifm_depth / (float)ethosu_round_up_to_multiple(ifm_depth, 8); + part_kernel_utilization *= (float)kernel_size / (float)ethosu_round_up_to_multiple(kernel_size, 4); + if (!operation->kernel.depthwise && (part_kernel_utilization >= depth_utilization || ifm_depth <= 8)) + is_part_kernel = true; + } + if (depth < operation->ofm.shape.depth) { depth = align(depth, ARCH_SPLIT_DEPTH); } @@ -170,6 +181,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope config.ofm_block.width = width; config.ofm_block.depth = depth; config.ofm_ublock = screen->ofm_ublock; + config.is_partkernel = is_part_kernel; best_cost = relative_cost; }