ethosu: Compute is_partkernel during scheduling

As we need it for encoding the weights. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39611>
2026-02-17 09:17:58 +01:00 · 2026-02-17 09:17:58 +01:00 · 410d74e078
commit 410d74e078
parent 3ade0a4dd6
5 changed files with 14 additions and 28 deletions
--- a/src/gallium/drivers/ethosu/ethosu_cmd.c
+++ b/src/gallium/drivers/ethosu/ethosu_cmd.c
@ -206,7 +206,7 @@ emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation
   stride |= ((operation->kernel.stride_y - 1) >> 1) << 9;
   stride |= (operation->kernel.dilation_x - 1) << 3;
   stride |= (operation->kernel.dilation_y - 1) << 4;
-   stride |= operation->conv.part_kernel_first << 2;
+   stride |= operation->block_config.is_partkernel << 2;
   EMIT0(NPU_SET_KERNEL_STRIDE, stride);
 }

--- a/src/gallium/drivers/ethosu/ethosu_coefs.c
+++ b/src/gallium/drivers/ethosu/ethosu_coefs.c
@ -110,7 +110,7 @@ fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
      input_weights,
      operation->block_config.ofm_block.depth,
      operation->kernel.depthwise,
-      operation->conv.part_kernel_first,
+      operation->block_config.is_partkernel,
      8 /* ifm_bitdepth */,
      8 /* decomp_h */,
      8 /* decomp_w */,
--- a/src/gallium/drivers/ethosu/ethosu_lower.c
+++ b/src/gallium/drivers/ethosu/ethosu_lower.c
@ -29,29 +29,6 @@ needed_total_padding(int input_size, int stride, int filter_size)
   return MAX2(filter_size - (input_size % stride), 0);
 }

-static bool
-ethosu_is_part_kernel_first(struct ethosu_operation *operation)
-{
-   // Determine which block traversal strategy has better DPU utilization
-   unsigned kernel_size = operation->kernel.height * operation->kernel.width;
-   unsigned depth = operation->ifm.shape.depth;
-   float depth_utilization = (float)depth / ethosu_round_up_to_multiple(depth, 32);
-   float part_kernel_utilization = ((float)depth / ethosu_round_up_to_multiple(depth, 8));
-   part_kernel_utilization *= (float)kernel_size / ethosu_round_up_to_multiple(kernel_size, 4);
-
-   if (operation->type != ETHOSU_OPERATION_TYPE_CONVOLUTION)
-      return false;
-
-   if (operation->kernel.depthwise)
-      return false;
-
-   // Part-kernel first is always better for ifm depths <= 8
-   if (part_kernel_utilization >= depth_utilization || depth <= 8)
-      return true;
-
-   return false;
-}
-
 static void
 set_feature_maps(struct pipe_tensor *input_tensor,
                 struct pipe_tensor *output_tensor,
@ -162,8 +139,6 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
      operation->kernel.zero_points = NULL;
   }

-   operation->conv.part_kernel_first = ethosu_is_part_kernel_first(operation);
-
   if (poperation->conv.padding_same) {
      unsigned vert = needed_total_padding(input_tensor->dims[1], poperation->conv.stride_y, poperation->conv.weight_tensor->dims[1]);
      unsigned horiz = needed_total_padding(input_tensor->dims[2], poperation->conv.stride_x, poperation->conv.weight_tensor->dims[2]);
--- a/src/gallium/drivers/ethosu/ethosu_ml.h
+++ b/src/gallium/drivers/ethosu/ethosu_ml.h
@ -136,7 +136,6 @@ struct ethosu_operation {
      struct {
         struct ethosu_address_range weights;
         struct ethosu_address_range scales;
-         bool part_kernel_first;
         bool depthwise;
      } conv;

--- a/src/gallium/drivers/ethosu/ethosu_sched.c
+++ b/src/gallium/drivers/ethosu/ethosu_sched.c
@ -90,6 +90,17 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope

   unsigned depth = MAX2(screen->ofm_ublock.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));

+   bool is_part_kernel = false;
+   if (is_convolution) {
+      unsigned kernel_size = operation->kernel.width * operation->kernel.height;
+      unsigned ifm_depth = operation->ifm.shape.depth;
+      float depth_utilization = (float)ifm_depth / (float)ethosu_round_up_to_multiple(ifm_depth, 32);
+      float part_kernel_utilization = (float)ifm_depth / (float)ethosu_round_up_to_multiple(ifm_depth, 8);
+      part_kernel_utilization *= (float)kernel_size / (float)ethosu_round_up_to_multiple(kernel_size, 4);
+      if (!operation->kernel.depthwise && (part_kernel_utilization >= depth_utilization || ifm_depth <= 8))
+         is_part_kernel = true;
+   }
+
   if (depth < operation->ofm.shape.depth) {
      depth = align(depth, ARCH_SPLIT_DEPTH);
   }
@ -170,6 +181,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
                     config.ofm_block.width = width;
                     config.ofm_block.depth = depth;
                     config.ofm_ublock = screen->ofm_ublock;
+                     config.is_partkernel = is_part_kernel;

                     best_cost = relative_cost;
                  }