From f9cd399eb0fb174e56a4e8e1e9b7e441e3b5a287 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Sun, 22 Mar 2026 16:12:21 +0100 Subject: [PATCH] ethosu: Fix ublock selection for 8-bit depthwise/pooling on U85-256 For U85-256 with 8-bit IFM, Vela's _uBlockToOpTable restricts which microblocks are valid per operation type: {2,2,8} and {4,1,8}: conv, matmul, vectorprod, reducesum, eltwise, resize {2,1,16}: depthwise, pool, eltwise, reduceminmax, argmax, resize Mesa's find_ublock() was not enforcing these constraints, allowing {4,1,8} or {2,2,8} to be selected for depthwise/pooling based on minimum waste. For depthwise ops with OFM shapes that aligned better to {4,1,8}, the wrong ublock was chosen, causing incorrect weight encoding and NPU hangs. Fix by skipping {4,1,8} and {2,2,8} for depthwise/pooling operations, matching Vela's operation-validity table. Part-of: --- src/gallium/drivers/ethosu/ethosu_sched_u85.c | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/ethosu/ethosu_sched_u85.c b/src/gallium/drivers/ethosu/ethosu_sched_u85.c index a5ad39ef2ec..70bd7b1d1e7 100644 --- a/src/gallium/drivers/ethosu/ethosu_sched_u85.c +++ b/src/gallium/drivers/ethosu/ethosu_sched_u85.c @@ -656,23 +656,31 @@ find_ublock(struct ethosu_operation *operation, bool is_part_kernel) continue; /* Skip 2x1x16 for part-kernel 1x1 */ } - /* The 2x1x16 microblock is only valid for 16-bit IFM convolutions. - * For 8-bit IFM, it can only be used for depthwise/pooling operations. - * This matches the bitsToOperations table in Vela's ethos_u85.cpp. + /* U85-256 ublock-to-operation validity for 8-bit IFM + * (from Vela's _uBlockToOpTable in ethos_u85.cpp): + * + * {2,2,8} / Shape(2,2,8): conv, matmul, vectorprod, reducesum, eltwise, resize + * {4,1,8} / Shape(1,4,8): conv, matmul, vectorprod, reducesum, eltwise, resize + * {2,1,16} / Shape(1,2,16): depthwise, pool, eltwise, reduceminmax, argmax, resize + * + * So for 8-bit IFM: + * - depthwise/pooling can ONLY use {2,1,16} + * - convolution (non-depthwise) CANNOT use {2,1,16} */ + + /* Skip {2,1,16} for 8-bit non-depthwise convolutions */ if (ublk.width == 2 && ublk.height == 1 && ublk.depth == 16 && operation->type == ETHOSU_OPERATION_TYPE_CONVOLUTION && !is_depthwise) { - continue; /* Skip 2x1x16 for 8-bit IFM non-depthwise convolutions */ + continue; } - /* For non-1x1 regular convolutions, skip 2x1x16 unless it's depthwise */ - if (!is_pointwise && !is_depthwise && - operation->type == ETHOSU_OPERATION_TYPE_CONVOLUTION && - ublk.width == 2 && - ublk.height == 1 && - ublk.depth == 16) + /* Skip {4,1,8} and {2,2,8} for 8-bit depthwise/pooling — + * only {2,1,16} is valid for these operations at 8-bit */ + if (!(ublk.width == 2 && ublk.height == 1 && ublk.depth == 16) && + (is_depthwise || is_pooling)) { continue; + } /* Minimum waste is better than aspect correct */ struct ethosu_block tmp = block_round_away(operation->ofm.shape, ublk);