pan/compiler: Add a pan_varying_layout struct

Right now, the varying layout is entirely a series of assumptions made
various places in the driver and compiler.  This adds an explicit
structure for tracking the layout which we will eventually plumb through
everywhere.

Co-authored-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Acked-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38681>
This commit is contained in:
Faith Ekstrand 2025-12-03 13:26:37 -05:00 committed by Marge Bot
parent 84dcdf87bf
commit 1efba676b1
4 changed files with 429 additions and 31 deletions

View file

@ -209,6 +209,10 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs);
pan_nir_lower_texture_late(s, inputs.gpu_id);
/* nir_opt_varyings is replacing all flat highp types with float32, we need
* to figure out the varying types ourselves */
inputs.trust_varying_flat_highp_types = false;
if (dev->arch >= 9) {
inputs.valhall.use_ld_var_buf = panfrost_use_ld_var_buf(s);
/* Always enable this for GL, it avoids crashes when using unbound

View file

@ -126,6 +126,11 @@ struct pan_compile_inputs {
*/
uint32_t fixed_varying_mask;
/* Optimizations as nir_opt_varyings can erase all flat types to float, when
* this field is false, varying types are inferred from their usage.
*/
bool trust_varying_flat_highp_types;
/* Settings to move constants into the FAU. */
struct {
uint32_t *values;
@ -143,18 +148,170 @@ struct pan_compile_inputs {
};
};
enum pan_varying_section {
PAN_VARYING_SECTION_POSITION,
PAN_VARYING_SECTION_ATTRIBS,
/* Varyings computed on-the-fly */
PAN_VARYING_SECTION_SPECIAL,
PAN_VARYING_SECTION_GENERIC,
};
/* Varyings which go in PAN_VARYING_SECTION_ATTRIBS */
#define PAN_ATTRIB_VARYING_BITS \
(VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | \
VARYING_BIT_PRIMITIVE_ID)
/* Varyings which go in PAN_VARYING_SECTION_SPECIAL (Midgard only) */
#define PAN_SPECIAL_VARYING_BITS \
(VARYING_BIT_PNTC | VARYING_BIT_POS | VARYING_BIT_FACE)
/* Varyings which DO NOT go in PAN_VARYING_SECTION_GENERIC */
#define PAN_HARDWARE_VARYING_BITS \
(VARYING_BIT_POS | PAN_ATTRIB_VARYING_BITS | PAN_SPECIAL_VARYING_BITS)
struct pan_varying_slot {
/* GLSL/SPIR-V location of the varying slot */
gl_varying_slot location : 7;
/* Format of the varying slot in memory
* (really nir_alu_type, but the compiler screams at you if you don't lie) */
unsigned alu_type : 8;
unsigned ncomps : 3;
enum pan_varying_section section : 2;
/* Offset of the varying slot in the specified section of the varying
* buffer. For special VS outputs (see PAN_ATTRIB_VARYING_BITS), this is
* relative to the start of the position header. For all other varyings,
* this is relative to the start of the varying space. The offset will be
* -1 if unknown (before the memory layout is built).
*/
int offset : 12;
};
static_assert(sizeof(struct pan_varying_slot) == 4,
"This struct has no holes");
static inline bool
pan_varying_slot_is_empty(const struct pan_varying_slot *slot)
{
return slot->alu_type == nir_type_invalid;
}
enum ENUM_PACKED pan_varying_knowledge {
PAN_VARYING_FORMAT_KNOWN = BITFIELD_BIT(0),
PAN_VARYING_LAYOUT_KNOWN = BITFIELD_BIT(1),
};
/* Contains information about varyings, both their format and the physical
* memory layout. The format is not necessarily what is actually stored in
* memory, but what format is in the register before the store_output, or what
* the shader expects after a load_input. The layout is optional and specifies
* the exact offset in memory of each varying, its section and the size of the
* generic buffer. The layout is only built for the Vertex Shader and passed
* on to the Fragment Shader if they are linked together, since the struct is
* valid even without format or layout information, the "known" field tracks
* what information the structure has, before accessing any format information
* you should check with `pan_varying_layout_require_format` that it is built
* and before accessing any layout information you should check with
* pan_varying_layout_require_layout if it is present.
*
* The format and layout are not split into two different structures to avoid
* duplicating indexing information.
*
* The slots are valid only up to `count`, but can also contain holes if they
* have been dead-code-eliminated after `nir_assign_io_var_locations`. Please
* use `pan_varying_slot_is_empty` to check if slots are empty. Empty slots are
* ignored by finding functions.
*/
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
struct pan_varying_layout {
uint8_t count;
enum pan_varying_knowledge known;
/* Size of the generic section, in bytes */
uint16_t generic_size_B;
struct pan_varying_slot slots[PAN_MAX_VARYINGS];
};
PRAGMA_DIAGNOSTIC_POP
static inline const struct pan_varying_slot *
pan_varying_layout_find_slot(const struct pan_varying_layout *layout,
gl_varying_slot location)
{
for (unsigned i = 0; i < layout->count; i++) {
if (layout->slots[i].location != location)
continue;
const struct pan_varying_slot *slot = &layout->slots[i];
if (pan_varying_slot_is_empty(slot))
break;
return slot;
}
return NULL;
}
static inline const struct pan_varying_slot *
pan_varying_layout_slot_at(const struct pan_varying_layout *layout,
unsigned index)
{
if (index >= layout->count)
return NULL;
const struct pan_varying_slot *slot = &layout->slots[index];
if (pan_varying_slot_is_empty(slot))
return NULL;
return slot;
}
static inline uint32_t
pan_get_fixed_varying_mask(unsigned varyings_used)
{
return (varyings_used & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
~VARYING_BIT_POS & ~PAN_ATTRIB_VARYING_BITS;
}
static inline void
pan_varying_layout_require_format(const struct pan_varying_layout *layout)
{
assert(layout);
if (!(layout->known & PAN_VARYING_FORMAT_KNOWN))
assert(!"Format is required");
}
static inline void
pan_varying_layout_require_layout(const struct pan_varying_layout *layout)
{
assert(layout);
if (!(layout->known & PAN_VARYING_LAYOUT_KNOWN))
assert(!"Layout is required");
}
enum pipe_format
pan_varying_format(nir_alu_type type, unsigned ncomps);
/** Builds a varying layout according to the SSO ABI we developed for OpenGL.
*
* This can be called on either shader stage and the two varying layouts are
* guaranteed to match if the same fixed_varyings are passed into both.
*/
void
pan_build_varying_layout_sso_abi(struct pan_varying_layout *layout,
nir_shader *nir, unsigned gpu_id,
uint32_t fixed_varyings);
void
pan_varying_collect_formats(struct pan_varying_layout *registry,
nir_shader *nir, unsigned gpu_id,
bool trust_varying_flat_highp_types,
bool lower_mediump);
struct pan_shader_varying {
gl_varying_slot location;
enum pipe_format format;
};
static inline unsigned
pan_get_fixed_varying_mask(unsigned varyings_used)
{
return (varyings_used & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
}
struct bifrost_shader_blend_info {
nir_alu_type type;
uint32_t return_offset;

View file

@ -1,15 +1,17 @@
/*
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
* Copyright (C) 2019-2022 Collabora, Ltd.
* Copyright (C) 2019-2022,2026 Collabora, Ltd.
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "pan_nir.h"
#include "midgard/midgard_quirks.h"
#include "panfrost/model/pan_model.h"
static enum pipe_format
varying_format(nir_alu_type t, unsigned ncomps)
enum pipe_format
pan_varying_format(nir_alu_type t, unsigned ncomps)
{
assert(ncomps >= 1 && ncomps <= 4);
@ -29,8 +31,10 @@ varying_format(nir_alu_type t, unsigned ncomps)
} conv[] = {
VARYING_FORMAT(float, 32, FLOAT, 32),
VARYING_FORMAT(uint, 32, UINT, 32),
VARYING_FORMAT(int, 32, SINT, 32),
VARYING_FORMAT(float, 16, FLOAT, 16),
VARYING_FORMAT(uint, 16, UINT, 16),
VARYING_FORMAT(int, 16, SINT, 16),
};
#undef VARYING_FORMAT
@ -46,20 +50,21 @@ varying_format(nir_alu_type t, unsigned ncomps)
struct slot_info {
nir_alu_type type;
bool any_highp;
unsigned count;
unsigned index;
};
struct walk_varyings_data {
struct pan_shader_info *info;
bool quirk_no_auto32;
struct slot_info *slots;
bool trust_varying_flat_highp_types;
};
static bool
walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
{
struct walk_varyings_data *wv_data = data;
struct pan_shader_info *info = wv_data->info;
struct slot_info *slots = wv_data->slots;
if (instr->type != nir_instr_type_intrinsic)
@ -67,7 +72,8 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
unsigned count;
unsigned size;
nir_alu_type type;
bool is_store;
/* Only consider intrinsics that access varyings */
switch (intr->intrinsic) {
@ -77,7 +83,8 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
return false;
count = nir_src_num_components(intr->src[0]);
size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
type = nir_intrinsic_src_type(intr);
is_store = true;
break;
case nir_intrinsic_load_input:
@ -86,7 +93,8 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
return false;
count = intr->def.num_components;
size = intr->def.bit_size;
type = nir_intrinsic_dest_type(intr);
is_store = false;
break;
default:
@ -98,21 +106,30 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
if (sem.no_varying)
return false;
/* In a fragment shader, flat shading is lowered to load_input but
* interpolation is lowered to load_interpolated_input, so we can check
* the intrinsic to distinguish.
*
* In a vertex shader, we consider everything flat, as the information
* will not contribute to the final linked varyings -- flatness is used
* only to determine the type, and the GL linker uses the type from the
* fragment shader instead.
*/
bool flat = intr->intrinsic != nir_intrinsic_load_interpolated_input;
bool auto32 = !info->quirk_no_auto32 && size == 32;
nir_alu_type type = (flat && auto32) ? nir_type_uint : nir_type_float;
nir_alu_type base_type = nir_alu_type_get_base_type(type);
unsigned size = nir_alu_type_get_type_size(type);
assert(base_type & (nir_type_int | nir_type_uint | nir_type_float));
assert(size == 32 || size == 16);
type |= size;
bool auto32 = !wv_data->quirk_no_auto32 && size == 32;
bool untrusted_type = !wv_data->trust_varying_flat_highp_types &&
sem.location >= VARYING_SLOT_VAR0 &&
!sem.medium_precision &&
!b->shader->info.separate_shader;
if (untrusted_type) {
/* Don't trust the type, varying_opts might have smashed everything
* onto floats. Replace all flat varyings with ints and smooth varyings
* with floats, only exception is 16-bit flat varyings that should be
* stored/loaded as ints as the hardware cannot encode 16-bit flat ints.
* Read docs/drivers/panfrost/varyings.rst for details.
*/
bool is_flat = intr->intrinsic != nir_intrinsic_load_interpolated_input;
base_type = (is_flat && auto32) ? nir_type_uint : nir_type_float;
type = base_type | size;
if (is_store)
nir_intrinsic_set_src_type(intr, type);
else
nir_intrinsic_set_dest_type(intr, type);
}
/* Count currently contains the number of components accessed by this
* intrinsics. However, we may be accessing a fractional location,
@ -135,6 +152,9 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
slots[location].index = index;
}
if (size == 32 && !sem.medium_precision)
slots[location].any_highp = true;
slots[location].count = MAX2(slots[location].count, count);
}
@ -184,7 +204,10 @@ pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
return;
struct slot_info slots[64] = {0};
struct walk_varyings_data wv_data = {info, slots};
struct walk_varyings_data wv_data = {
.quirk_no_auto32 = info->quirk_no_auto32,
.slots = slots
};
nir_shader_instructions_pass(s, walk_varyings, nir_metadata_all, &wv_data);
struct pan_shader_varying *varyings = (s->info.stage == MESA_SHADER_VERTEX)
@ -197,7 +220,7 @@ pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
if (!slots[i].type)
continue;
enum pipe_format format = varying_format(slots[i].type, slots[i].count);
enum pipe_format format = pan_varying_format(slots[i].type, slots[i].count);
assert(format != PIPE_FORMAT_NONE);
unsigned index = slots[i].index;
@ -216,3 +239,215 @@ pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
info->varyings.noperspective =
pan_nir_collect_noperspective_varyings_fs(s);
}
/*
* ABI: Special (desktop GL) slots come first, tightly packed. General varyings
* come later, sparsely packed. This handles both linked and separable shaders
* with a common code path, with minimal keying only for desktop GL. Each slot
* consumes 16 bytes (TODO: fp16, partial vectors).
*
* This is a copy+paste of the identical function in bifrost_compile.c
*/
static unsigned
bi_varying_base_bytes(gl_varying_slot slot, uint32_t fixed_varyings)
{
if (slot >= VARYING_SLOT_VAR0) {
unsigned nr_special = util_bitcount(fixed_varyings);
unsigned general_index = (slot - VARYING_SLOT_VAR0);
return 16 * (nr_special + general_index);
} else {
return 16 * (util_bitcount(fixed_varyings & BITFIELD_MASK(slot)));
}
}
static const struct pan_varying_slot hw_varying_slots[] = {{
.location = VARYING_SLOT_POS,
.alu_type = nir_type_float32,
.ncomps = 4,
.section = PAN_VARYING_SECTION_POSITION,
.offset = 0,
}, {
.location = VARYING_SLOT_PSIZ,
.alu_type = nir_type_float16,
.ncomps = 1,
.section = PAN_VARYING_SECTION_ATTRIBS,
.offset = 0,
}, {
.location = VARYING_SLOT_LAYER,
.alu_type = nir_type_uint8,
.ncomps = 1,
.section = PAN_VARYING_SECTION_ATTRIBS,
.offset = 2,
}, {
.location = VARYING_SLOT_VIEWPORT,
.alu_type = nir_type_uint8,
.ncomps = 1,
.section = PAN_VARYING_SECTION_ATTRIBS,
.offset = 2,
}, {
.location = VARYING_SLOT_PRIMITIVE_ID,
.alu_type = nir_type_uint32,
.ncomps = 1,
.section = PAN_VARYING_SECTION_ATTRIBS,
.offset = 12,
}};
/* On Midgard some attributes are computed on-the-fly from the drawing state,
* those are called special and require a custom descriptor definition.
* From v6 onwards those use the LD_VAR_SPECIAL instruction.
* Also on Midgard, VARYING_SLOT_TEX* might be point coordinates depending on
* the rasterizer state, if they are they should be theoretically in the special
* section. Since we don't know this yet we "misplace" them in the generic
* section anyway, they won't end up in the memory layout and they'll be handled
* by the descriptor emitter code.
* It's not a mistake, just a "happy little accident".
*/
static const struct pan_varying_slot special_varying_slots[] = {{
.location = VARYING_SLOT_POS,
.alu_type = nir_type_float32,
.ncomps = 4,
.section = PAN_VARYING_SECTION_SPECIAL,
.offset = 0,
}, {
.location = VARYING_SLOT_PNTC,
.alu_type = nir_type_float32,
.ncomps = 1,
.section = PAN_VARYING_SECTION_SPECIAL,
.offset = 0,
}, {
.location = VARYING_SLOT_FACE,
.alu_type = nir_type_uint32,
.ncomps = 1,
.section = PAN_VARYING_SECTION_SPECIAL,
.offset = 0,
}};
static struct pan_varying_slot
hw_varying_slot(unsigned arch, mesa_shader_stage stage, gl_varying_slot slot)
{
bool vs_pos = slot == VARYING_SLOT_POS && stage == MESA_SHADER_VERTEX;
/* pos is only special in fragment shader input, not vertex shader output */
if (arch < 6 && !vs_pos) {
for (unsigned i = 0; i < ARRAY_SIZE(special_varying_slots); i++) {
if (special_varying_slots[i].location == slot)
return special_varying_slots[i];
}
}
for (unsigned i = 0; i < ARRAY_SIZE(hw_varying_slots); i++) {
if (hw_varying_slots[i].location == slot)
return hw_varying_slots[i];
}
UNREACHABLE("Invalid HW varying slot");
}
void
pan_varying_collect_formats(struct pan_varying_layout *layout, nir_shader *nir,
unsigned gpu_id, bool trust_varying_flat_highp_types,
bool lower_mediump)
{
assert(nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_FRAGMENT);
memset(layout, 0, sizeof(*layout));
const unsigned gpu_arch = pan_arch(gpu_id);
bool quirk_no_auto32 = gpu_arch <= 5 &&
(midgard_get_quirks(gpu_id) & MIDGARD_NO_AUTO32);
struct slot_info slots[64] = {0};
struct walk_varyings_data wv_data = {
.quirk_no_auto32 = quirk_no_auto32,
.slots = slots,
.trust_varying_flat_highp_types = trust_varying_flat_highp_types,
};
nir_shader_instructions_pass(nir, walk_varyings, nir_metadata_all, &wv_data);
unsigned count = 0;
for (unsigned i = 0; i < ARRAY_SIZE(slots); i++) {
if (!slots[i].type)
continue;
/* It's possible that something has been dead code eliminated between
* when the driver locations were set on variables and here. Don't
* trust our compaction to match the driver. Just copy over the index
* and accept that there's a hole in the mapping.
*/
unsigned idx = slots[i].index;
count = MAX2(count, idx + 1);
assert(count <= ARRAY_SIZE(layout->slots));
assert(layout->slots[idx].alu_type == nir_type_invalid);
if (BITFIELD64_BIT(i) & PAN_HARDWARE_VARYING_BITS) {
layout->slots[idx] = hw_varying_slot(gpu_arch, nir->info.stage, i);
} else {
nir_alu_type type = nir_alu_type_get_base_type(slots[i].type);
unsigned bit_size = nir_alu_type_get_type_size(slots[i].type);
/* The Vulkan spec requires types to match across all uses of a
* location but doesn't actually require RelaxedPrecision to match
* for the whole location. So we can only apply mediump if every use
* of the location is mediump.
* Don't lower mediump integers, it has no measured impact and causes
* lots of bugs due to gallium shenanigans.
* Also allow the client to remove mediump lowering and keep the
* original types
*/
bool can_lower_size = lower_mediump &&
bit_size == 32 &&
type == nir_type_float &&
!slots[i].any_highp;
if (can_lower_size)
bit_size = 16;
layout->slots[idx] = (struct pan_varying_slot){
.location = i,
.alu_type = type | bit_size,
.ncomps = slots[i].count,
.section = PAN_VARYING_SECTION_GENERIC,
/* Don't know the offset yet */
.offset = -1,
};
}
}
layout->count = count;
layout->generic_size_B = 0;
layout->known |= PAN_VARYING_FORMAT_KNOWN;
}
void
pan_build_varying_layout_sso_abi(struct pan_varying_layout *layout,
nir_shader *nir, unsigned gpu_id,
uint32_t fixed_varyings)
{
pan_varying_layout_require_format(layout);
const unsigned gpu_arch = pan_arch(gpu_id);
unsigned generic_size_B = 0;
for (unsigned i = 0; i < layout->count; i++) {
struct pan_varying_slot *slot = &layout->slots[i];
if (pan_varying_slot_is_empty(slot))
continue;
if (slot->section != PAN_VARYING_SECTION_GENERIC) {
ASSERTED const struct pan_varying_slot hw_slot =
hw_varying_slot(gpu_arch, nir->info.stage, slot->location);
assert(memcmp(slot, &hw_slot, sizeof(*slot)) == 0);
} else {
unsigned offset =
bi_varying_base_bytes(slot->location, fixed_varyings);
assert(offset < (1 << 11));
const unsigned bit_size = nir_alu_type_get_type_size(slot->alu_type);
const unsigned size = slot->ncomps * (bit_size / 8);
generic_size_B = MAX2(generic_size_B, offset + size);
assert(slot->offset == -1);
assert(offset < 4096);
slot->offset = offset;
}
}
layout->generic_size_B = generic_size_B;
layout->known |= PAN_VARYING_LAYOUT_KNOWN;
}

View file

@ -1345,6 +1345,8 @@ panvk_compile_shader(struct panvk_device *dev,
nir_assign_io_var_locations(nir, nir_var_shader_out);
panvk_lower_nir_io(nir);
inputs.trust_varying_flat_highp_types = true;
variant->own_bin = true;
result = panvk_compile_nir(dev, nir, info->flags, &inputs, state,