pan/compiler: Add a pan_varying_layout struct
Right now, the varying layout is entirely a series of assumptions made various places in the driver and compiler. This adds an explicit structure for tracking the layout which we will eventually plumb through everywhere. Co-authored-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Acked-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38681>
This commit is contained in:
parent
84dcdf87bf
commit
1efba676b1
4 changed files with 429 additions and 31 deletions
|
|
@ -209,6 +209,10 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
|||
NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs);
|
||||
pan_nir_lower_texture_late(s, inputs.gpu_id);
|
||||
|
||||
/* nir_opt_varyings is replacing all flat highp types with float32, we need
|
||||
* to figure out the varying types ourselves */
|
||||
inputs.trust_varying_flat_highp_types = false;
|
||||
|
||||
if (dev->arch >= 9) {
|
||||
inputs.valhall.use_ld_var_buf = panfrost_use_ld_var_buf(s);
|
||||
/* Always enable this for GL, it avoids crashes when using unbound
|
||||
|
|
|
|||
|
|
@ -126,6 +126,11 @@ struct pan_compile_inputs {
|
|||
*/
|
||||
uint32_t fixed_varying_mask;
|
||||
|
||||
/* Optimizations as nir_opt_varyings can erase all flat types to float, when
|
||||
* this field is false, varying types are inferred from their usage.
|
||||
*/
|
||||
bool trust_varying_flat_highp_types;
|
||||
|
||||
/* Settings to move constants into the FAU. */
|
||||
struct {
|
||||
uint32_t *values;
|
||||
|
|
@ -143,18 +148,170 @@ struct pan_compile_inputs {
|
|||
};
|
||||
};
|
||||
|
||||
enum pan_varying_section {
|
||||
PAN_VARYING_SECTION_POSITION,
|
||||
PAN_VARYING_SECTION_ATTRIBS,
|
||||
/* Varyings computed on-the-fly */
|
||||
PAN_VARYING_SECTION_SPECIAL,
|
||||
PAN_VARYING_SECTION_GENERIC,
|
||||
};
|
||||
|
||||
/* Varyings which go in PAN_VARYING_SECTION_ATTRIBS */
|
||||
#define PAN_ATTRIB_VARYING_BITS \
|
||||
(VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | \
|
||||
VARYING_BIT_PRIMITIVE_ID)
|
||||
|
||||
/* Varyings which go in PAN_VARYING_SECTION_SPECIAL (Midgard only) */
|
||||
#define PAN_SPECIAL_VARYING_BITS \
|
||||
(VARYING_BIT_PNTC | VARYING_BIT_POS | VARYING_BIT_FACE)
|
||||
|
||||
/* Varyings which DO NOT go in PAN_VARYING_SECTION_GENERIC */
|
||||
#define PAN_HARDWARE_VARYING_BITS \
|
||||
(VARYING_BIT_POS | PAN_ATTRIB_VARYING_BITS | PAN_SPECIAL_VARYING_BITS)
|
||||
|
||||
struct pan_varying_slot {
|
||||
/* GLSL/SPIR-V location of the varying slot */
|
||||
gl_varying_slot location : 7;
|
||||
|
||||
/* Format of the varying slot in memory
|
||||
* (really nir_alu_type, but the compiler screams at you if you don't lie) */
|
||||
unsigned alu_type : 8;
|
||||
unsigned ncomps : 3;
|
||||
|
||||
enum pan_varying_section section : 2;
|
||||
|
||||
/* Offset of the varying slot in the specified section of the varying
|
||||
* buffer. For special VS outputs (see PAN_ATTRIB_VARYING_BITS), this is
|
||||
* relative to the start of the position header. For all other varyings,
|
||||
* this is relative to the start of the varying space. The offset will be
|
||||
* -1 if unknown (before the memory layout is built).
|
||||
*/
|
||||
int offset : 12;
|
||||
};
|
||||
static_assert(sizeof(struct pan_varying_slot) == 4,
|
||||
"This struct has no holes");
|
||||
|
||||
static inline bool
|
||||
pan_varying_slot_is_empty(const struct pan_varying_slot *slot)
|
||||
{
|
||||
return slot->alu_type == nir_type_invalid;
|
||||
}
|
||||
|
||||
enum ENUM_PACKED pan_varying_knowledge {
|
||||
PAN_VARYING_FORMAT_KNOWN = BITFIELD_BIT(0),
|
||||
PAN_VARYING_LAYOUT_KNOWN = BITFIELD_BIT(1),
|
||||
};
|
||||
|
||||
/* Contains information about varyings, both their format and the physical
|
||||
* memory layout. The format is not necessarily what is actually stored in
|
||||
* memory, but what format is in the register before the store_output, or what
|
||||
* the shader expects after a load_input. The layout is optional and specifies
|
||||
* the exact offset in memory of each varying, its section and the size of the
|
||||
* generic buffer. The layout is only built for the Vertex Shader and passed
|
||||
* on to the Fragment Shader if they are linked together, since the struct is
|
||||
* valid even without format or layout information, the "known" field tracks
|
||||
* what information the structure has, before accessing any format information
|
||||
* you should check with `pan_varying_layout_require_format` that it is built
|
||||
* and before accessing any layout information you should check with
|
||||
* pan_varying_layout_require_layout if it is present.
|
||||
*
|
||||
* The format and layout are not split into two different structures to avoid
|
||||
* duplicating indexing information.
|
||||
*
|
||||
* The slots are valid only up to `count`, but can also contain holes if they
|
||||
* have been dead-code-eliminated after `nir_assign_io_var_locations`. Please
|
||||
* use `pan_varying_slot_is_empty` to check if slots are empty. Empty slots are
|
||||
* ignored by finding functions.
|
||||
*/
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
|
||||
struct pan_varying_layout {
|
||||
uint8_t count;
|
||||
enum pan_varying_knowledge known;
|
||||
/* Size of the generic section, in bytes */
|
||||
uint16_t generic_size_B;
|
||||
|
||||
struct pan_varying_slot slots[PAN_MAX_VARYINGS];
|
||||
};
|
||||
PRAGMA_DIAGNOSTIC_POP
|
||||
|
||||
static inline const struct pan_varying_slot *
|
||||
pan_varying_layout_find_slot(const struct pan_varying_layout *layout,
|
||||
gl_varying_slot location)
|
||||
{
|
||||
for (unsigned i = 0; i < layout->count; i++) {
|
||||
if (layout->slots[i].location != location)
|
||||
continue;
|
||||
const struct pan_varying_slot *slot = &layout->slots[i];
|
||||
if (pan_varying_slot_is_empty(slot))
|
||||
break;
|
||||
return slot;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline const struct pan_varying_slot *
|
||||
pan_varying_layout_slot_at(const struct pan_varying_layout *layout,
|
||||
unsigned index)
|
||||
{
|
||||
if (index >= layout->count)
|
||||
return NULL;
|
||||
|
||||
const struct pan_varying_slot *slot = &layout->slots[index];
|
||||
if (pan_varying_slot_is_empty(slot))
|
||||
return NULL;
|
||||
|
||||
return slot;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
pan_get_fixed_varying_mask(unsigned varyings_used)
|
||||
{
|
||||
return (varyings_used & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~PAN_ATTRIB_VARYING_BITS;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_varying_layout_require_format(const struct pan_varying_layout *layout)
|
||||
{
|
||||
assert(layout);
|
||||
if (!(layout->known & PAN_VARYING_FORMAT_KNOWN))
|
||||
assert(!"Format is required");
|
||||
}
|
||||
|
||||
static inline void
|
||||
pan_varying_layout_require_layout(const struct pan_varying_layout *layout)
|
||||
{
|
||||
assert(layout);
|
||||
if (!(layout->known & PAN_VARYING_LAYOUT_KNOWN))
|
||||
assert(!"Layout is required");
|
||||
}
|
||||
|
||||
enum pipe_format
|
||||
pan_varying_format(nir_alu_type type, unsigned ncomps);
|
||||
|
||||
/** Builds a varying layout according to the SSO ABI we developed for OpenGL.
|
||||
*
|
||||
* This can be called on either shader stage and the two varying layouts are
|
||||
* guaranteed to match if the same fixed_varyings are passed into both.
|
||||
*/
|
||||
void
|
||||
pan_build_varying_layout_sso_abi(struct pan_varying_layout *layout,
|
||||
nir_shader *nir, unsigned gpu_id,
|
||||
uint32_t fixed_varyings);
|
||||
|
||||
void
|
||||
pan_varying_collect_formats(struct pan_varying_layout *registry,
|
||||
nir_shader *nir, unsigned gpu_id,
|
||||
bool trust_varying_flat_highp_types,
|
||||
bool lower_mediump);
|
||||
|
||||
struct pan_shader_varying {
|
||||
gl_varying_slot location;
|
||||
enum pipe_format format;
|
||||
};
|
||||
|
||||
static inline unsigned
|
||||
pan_get_fixed_varying_mask(unsigned varyings_used)
|
||||
{
|
||||
return (varyings_used & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
}
|
||||
|
||||
struct bifrost_shader_blend_info {
|
||||
nir_alu_type type;
|
||||
uint32_t return_offset;
|
||||
|
|
|
|||
|
|
@ -1,15 +1,17 @@
|
|||
/*
|
||||
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
|
||||
* Copyright (C) 2019-2022 Collabora, Ltd.
|
||||
* Copyright (C) 2019-2022,2026 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "pan_nir.h"
|
||||
#include "midgard/midgard_quirks.h"
|
||||
#include "panfrost/model/pan_model.h"
|
||||
|
||||
static enum pipe_format
|
||||
varying_format(nir_alu_type t, unsigned ncomps)
|
||||
enum pipe_format
|
||||
pan_varying_format(nir_alu_type t, unsigned ncomps)
|
||||
{
|
||||
assert(ncomps >= 1 && ncomps <= 4);
|
||||
|
||||
|
|
@ -29,8 +31,10 @@ varying_format(nir_alu_type t, unsigned ncomps)
|
|||
} conv[] = {
|
||||
VARYING_FORMAT(float, 32, FLOAT, 32),
|
||||
VARYING_FORMAT(uint, 32, UINT, 32),
|
||||
VARYING_FORMAT(int, 32, SINT, 32),
|
||||
VARYING_FORMAT(float, 16, FLOAT, 16),
|
||||
VARYING_FORMAT(uint, 16, UINT, 16),
|
||||
VARYING_FORMAT(int, 16, SINT, 16),
|
||||
};
|
||||
#undef VARYING_FORMAT
|
||||
|
||||
|
|
@ -46,20 +50,21 @@ varying_format(nir_alu_type t, unsigned ncomps)
|
|||
|
||||
struct slot_info {
|
||||
nir_alu_type type;
|
||||
bool any_highp;
|
||||
unsigned count;
|
||||
unsigned index;
|
||||
};
|
||||
|
||||
struct walk_varyings_data {
|
||||
struct pan_shader_info *info;
|
||||
bool quirk_no_auto32;
|
||||
struct slot_info *slots;
|
||||
bool trust_varying_flat_highp_types;
|
||||
};
|
||||
|
||||
static bool
|
||||
walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
struct walk_varyings_data *wv_data = data;
|
||||
struct pan_shader_info *info = wv_data->info;
|
||||
struct slot_info *slots = wv_data->slots;
|
||||
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
|
|
@ -67,7 +72,8 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
|
|||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
unsigned count;
|
||||
unsigned size;
|
||||
nir_alu_type type;
|
||||
bool is_store;
|
||||
|
||||
/* Only consider intrinsics that access varyings */
|
||||
switch (intr->intrinsic) {
|
||||
|
|
@ -77,7 +83,8 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
|
|||
return false;
|
||||
|
||||
count = nir_src_num_components(intr->src[0]);
|
||||
size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
|
||||
type = nir_intrinsic_src_type(intr);
|
||||
is_store = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_input:
|
||||
|
|
@ -86,7 +93,8 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
|
|||
return false;
|
||||
|
||||
count = intr->def.num_components;
|
||||
size = intr->def.bit_size;
|
||||
type = nir_intrinsic_dest_type(intr);
|
||||
is_store = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -98,21 +106,30 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
|
|||
if (sem.no_varying)
|
||||
return false;
|
||||
|
||||
/* In a fragment shader, flat shading is lowered to load_input but
|
||||
* interpolation is lowered to load_interpolated_input, so we can check
|
||||
* the intrinsic to distinguish.
|
||||
*
|
||||
* In a vertex shader, we consider everything flat, as the information
|
||||
* will not contribute to the final linked varyings -- flatness is used
|
||||
* only to determine the type, and the GL linker uses the type from the
|
||||
* fragment shader instead.
|
||||
*/
|
||||
bool flat = intr->intrinsic != nir_intrinsic_load_interpolated_input;
|
||||
bool auto32 = !info->quirk_no_auto32 && size == 32;
|
||||
nir_alu_type type = (flat && auto32) ? nir_type_uint : nir_type_float;
|
||||
nir_alu_type base_type = nir_alu_type_get_base_type(type);
|
||||
unsigned size = nir_alu_type_get_type_size(type);
|
||||
assert(base_type & (nir_type_int | nir_type_uint | nir_type_float));
|
||||
|
||||
assert(size == 32 || size == 16);
|
||||
type |= size;
|
||||
bool auto32 = !wv_data->quirk_no_auto32 && size == 32;
|
||||
bool untrusted_type = !wv_data->trust_varying_flat_highp_types &&
|
||||
sem.location >= VARYING_SLOT_VAR0 &&
|
||||
!sem.medium_precision &&
|
||||
!b->shader->info.separate_shader;
|
||||
if (untrusted_type) {
|
||||
/* Don't trust the type, varying_opts might have smashed everything
|
||||
* onto floats. Replace all flat varyings with ints and smooth varyings
|
||||
* with floats, only exception is 16-bit flat varyings that should be
|
||||
* stored/loaded as ints as the hardware cannot encode 16-bit flat ints.
|
||||
* Read docs/drivers/panfrost/varyings.rst for details.
|
||||
*/
|
||||
bool is_flat = intr->intrinsic != nir_intrinsic_load_interpolated_input;
|
||||
base_type = (is_flat && auto32) ? nir_type_uint : nir_type_float;
|
||||
type = base_type | size;
|
||||
if (is_store)
|
||||
nir_intrinsic_set_src_type(intr, type);
|
||||
else
|
||||
nir_intrinsic_set_dest_type(intr, type);
|
||||
}
|
||||
|
||||
/* Count currently contains the number of components accessed by this
|
||||
* intrinsics. However, we may be accessing a fractional location,
|
||||
|
|
@ -135,6 +152,9 @@ walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
|
|||
slots[location].index = index;
|
||||
}
|
||||
|
||||
if (size == 32 && !sem.medium_precision)
|
||||
slots[location].any_highp = true;
|
||||
|
||||
slots[location].count = MAX2(slots[location].count, count);
|
||||
}
|
||||
|
||||
|
|
@ -184,7 +204,10 @@ pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
|
|||
return;
|
||||
|
||||
struct slot_info slots[64] = {0};
|
||||
struct walk_varyings_data wv_data = {info, slots};
|
||||
struct walk_varyings_data wv_data = {
|
||||
.quirk_no_auto32 = info->quirk_no_auto32,
|
||||
.slots = slots
|
||||
};
|
||||
nir_shader_instructions_pass(s, walk_varyings, nir_metadata_all, &wv_data);
|
||||
|
||||
struct pan_shader_varying *varyings = (s->info.stage == MESA_SHADER_VERTEX)
|
||||
|
|
@ -197,7 +220,7 @@ pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
|
|||
if (!slots[i].type)
|
||||
continue;
|
||||
|
||||
enum pipe_format format = varying_format(slots[i].type, slots[i].count);
|
||||
enum pipe_format format = pan_varying_format(slots[i].type, slots[i].count);
|
||||
assert(format != PIPE_FORMAT_NONE);
|
||||
|
||||
unsigned index = slots[i].index;
|
||||
|
|
@ -216,3 +239,215 @@ pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
|
|||
info->varyings.noperspective =
|
||||
pan_nir_collect_noperspective_varyings_fs(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* ABI: Special (desktop GL) slots come first, tightly packed. General varyings
|
||||
* come later, sparsely packed. This handles both linked and separable shaders
|
||||
* with a common code path, with minimal keying only for desktop GL. Each slot
|
||||
* consumes 16 bytes (TODO: fp16, partial vectors).
|
||||
*
|
||||
* This is a copy+paste of the identical function in bifrost_compile.c
|
||||
*/
|
||||
static unsigned
|
||||
bi_varying_base_bytes(gl_varying_slot slot, uint32_t fixed_varyings)
|
||||
{
|
||||
if (slot >= VARYING_SLOT_VAR0) {
|
||||
unsigned nr_special = util_bitcount(fixed_varyings);
|
||||
unsigned general_index = (slot - VARYING_SLOT_VAR0);
|
||||
|
||||
return 16 * (nr_special + general_index);
|
||||
} else {
|
||||
return 16 * (util_bitcount(fixed_varyings & BITFIELD_MASK(slot)));
|
||||
}
|
||||
}
|
||||
|
||||
static const struct pan_varying_slot hw_varying_slots[] = {{
|
||||
.location = VARYING_SLOT_POS,
|
||||
.alu_type = nir_type_float32,
|
||||
.ncomps = 4,
|
||||
.section = PAN_VARYING_SECTION_POSITION,
|
||||
.offset = 0,
|
||||
}, {
|
||||
.location = VARYING_SLOT_PSIZ,
|
||||
.alu_type = nir_type_float16,
|
||||
.ncomps = 1,
|
||||
.section = PAN_VARYING_SECTION_ATTRIBS,
|
||||
.offset = 0,
|
||||
}, {
|
||||
.location = VARYING_SLOT_LAYER,
|
||||
.alu_type = nir_type_uint8,
|
||||
.ncomps = 1,
|
||||
.section = PAN_VARYING_SECTION_ATTRIBS,
|
||||
.offset = 2,
|
||||
}, {
|
||||
.location = VARYING_SLOT_VIEWPORT,
|
||||
.alu_type = nir_type_uint8,
|
||||
.ncomps = 1,
|
||||
.section = PAN_VARYING_SECTION_ATTRIBS,
|
||||
.offset = 2,
|
||||
}, {
|
||||
.location = VARYING_SLOT_PRIMITIVE_ID,
|
||||
.alu_type = nir_type_uint32,
|
||||
.ncomps = 1,
|
||||
.section = PAN_VARYING_SECTION_ATTRIBS,
|
||||
.offset = 12,
|
||||
}};
|
||||
|
||||
/* On Midgard some attributes are computed on-the-fly from the drawing state,
|
||||
* those are called special and require a custom descriptor definition.
|
||||
* From v6 onwards those use the LD_VAR_SPECIAL instruction.
|
||||
* Also on Midgard, VARYING_SLOT_TEX* might be point coordinates depending on
|
||||
* the rasterizer state, if they are they should be theoretically in the special
|
||||
* section. Since we don't know this yet we "misplace" them in the generic
|
||||
* section anyway, they won't end up in the memory layout and they'll be handled
|
||||
* by the descriptor emitter code.
|
||||
* It's not a mistake, just a "happy little accident".
|
||||
*/
|
||||
static const struct pan_varying_slot special_varying_slots[] = {{
|
||||
.location = VARYING_SLOT_POS,
|
||||
.alu_type = nir_type_float32,
|
||||
.ncomps = 4,
|
||||
.section = PAN_VARYING_SECTION_SPECIAL,
|
||||
.offset = 0,
|
||||
}, {
|
||||
.location = VARYING_SLOT_PNTC,
|
||||
.alu_type = nir_type_float32,
|
||||
.ncomps = 1,
|
||||
.section = PAN_VARYING_SECTION_SPECIAL,
|
||||
.offset = 0,
|
||||
}, {
|
||||
.location = VARYING_SLOT_FACE,
|
||||
.alu_type = nir_type_uint32,
|
||||
.ncomps = 1,
|
||||
.section = PAN_VARYING_SECTION_SPECIAL,
|
||||
.offset = 0,
|
||||
}};
|
||||
|
||||
static struct pan_varying_slot
|
||||
hw_varying_slot(unsigned arch, mesa_shader_stage stage, gl_varying_slot slot)
|
||||
{
|
||||
bool vs_pos = slot == VARYING_SLOT_POS && stage == MESA_SHADER_VERTEX;
|
||||
/* pos is only special in fragment shader input, not vertex shader output */
|
||||
if (arch < 6 && !vs_pos) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(special_varying_slots); i++) {
|
||||
if (special_varying_slots[i].location == slot)
|
||||
return special_varying_slots[i];
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(hw_varying_slots); i++) {
|
||||
if (hw_varying_slots[i].location == slot)
|
||||
return hw_varying_slots[i];
|
||||
}
|
||||
UNREACHABLE("Invalid HW varying slot");
|
||||
}
|
||||
|
||||
void
|
||||
pan_varying_collect_formats(struct pan_varying_layout *layout, nir_shader *nir,
|
||||
unsigned gpu_id, bool trust_varying_flat_highp_types,
|
||||
bool lower_mediump)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_VERTEX ||
|
||||
nir->info.stage == MESA_SHADER_FRAGMENT);
|
||||
memset(layout, 0, sizeof(*layout));
|
||||
|
||||
const unsigned gpu_arch = pan_arch(gpu_id);
|
||||
bool quirk_no_auto32 = gpu_arch <= 5 &&
|
||||
(midgard_get_quirks(gpu_id) & MIDGARD_NO_AUTO32);
|
||||
|
||||
struct slot_info slots[64] = {0};
|
||||
struct walk_varyings_data wv_data = {
|
||||
.quirk_no_auto32 = quirk_no_auto32,
|
||||
.slots = slots,
|
||||
.trust_varying_flat_highp_types = trust_varying_flat_highp_types,
|
||||
};
|
||||
|
||||
nir_shader_instructions_pass(nir, walk_varyings, nir_metadata_all, &wv_data);
|
||||
|
||||
unsigned count = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(slots); i++) {
|
||||
if (!slots[i].type)
|
||||
continue;
|
||||
|
||||
/* It's possible that something has been dead code eliminated between
|
||||
* when the driver locations were set on variables and here. Don't
|
||||
* trust our compaction to match the driver. Just copy over the index
|
||||
* and accept that there's a hole in the mapping.
|
||||
*/
|
||||
unsigned idx = slots[i].index;
|
||||
count = MAX2(count, idx + 1);
|
||||
assert(count <= ARRAY_SIZE(layout->slots));
|
||||
assert(layout->slots[idx].alu_type == nir_type_invalid);
|
||||
|
||||
if (BITFIELD64_BIT(i) & PAN_HARDWARE_VARYING_BITS) {
|
||||
layout->slots[idx] = hw_varying_slot(gpu_arch, nir->info.stage, i);
|
||||
} else {
|
||||
nir_alu_type type = nir_alu_type_get_base_type(slots[i].type);
|
||||
unsigned bit_size = nir_alu_type_get_type_size(slots[i].type);
|
||||
|
||||
/* The Vulkan spec requires types to match across all uses of a
|
||||
* location but doesn't actually require RelaxedPrecision to match
|
||||
* for the whole location. So we can only apply mediump if every use
|
||||
* of the location is mediump.
|
||||
* Don't lower mediump integers, it has no measured impact and causes
|
||||
* lots of bugs due to gallium shenanigans.
|
||||
* Also allow the client to remove mediump lowering and keep the
|
||||
* original types
|
||||
*/
|
||||
bool can_lower_size = lower_mediump &&
|
||||
bit_size == 32 &&
|
||||
type == nir_type_float &&
|
||||
!slots[i].any_highp;
|
||||
if (can_lower_size)
|
||||
bit_size = 16;
|
||||
|
||||
layout->slots[idx] = (struct pan_varying_slot){
|
||||
.location = i,
|
||||
.alu_type = type | bit_size,
|
||||
.ncomps = slots[i].count,
|
||||
.section = PAN_VARYING_SECTION_GENERIC,
|
||||
/* Don't know the offset yet */
|
||||
.offset = -1,
|
||||
};
|
||||
}
|
||||
}
|
||||
layout->count = count;
|
||||
layout->generic_size_B = 0;
|
||||
layout->known |= PAN_VARYING_FORMAT_KNOWN;
|
||||
}
|
||||
|
||||
void
|
||||
pan_build_varying_layout_sso_abi(struct pan_varying_layout *layout,
|
||||
nir_shader *nir, unsigned gpu_id,
|
||||
uint32_t fixed_varyings)
|
||||
{
|
||||
pan_varying_layout_require_format(layout);
|
||||
|
||||
const unsigned gpu_arch = pan_arch(gpu_id);
|
||||
unsigned generic_size_B = 0;
|
||||
for (unsigned i = 0; i < layout->count; i++) {
|
||||
struct pan_varying_slot *slot = &layout->slots[i];
|
||||
if (pan_varying_slot_is_empty(slot))
|
||||
continue;
|
||||
|
||||
if (slot->section != PAN_VARYING_SECTION_GENERIC) {
|
||||
ASSERTED const struct pan_varying_slot hw_slot =
|
||||
hw_varying_slot(gpu_arch, nir->info.stage, slot->location);
|
||||
|
||||
assert(memcmp(slot, &hw_slot, sizeof(*slot)) == 0);
|
||||
} else {
|
||||
unsigned offset =
|
||||
bi_varying_base_bytes(slot->location, fixed_varyings);
|
||||
assert(offset < (1 << 11));
|
||||
|
||||
const unsigned bit_size = nir_alu_type_get_type_size(slot->alu_type);
|
||||
const unsigned size = slot->ncomps * (bit_size / 8);
|
||||
generic_size_B = MAX2(generic_size_B, offset + size);
|
||||
|
||||
assert(slot->offset == -1);
|
||||
assert(offset < 4096);
|
||||
slot->offset = offset;
|
||||
}
|
||||
}
|
||||
layout->generic_size_B = generic_size_B;
|
||||
layout->known |= PAN_VARYING_LAYOUT_KNOWN;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1345,6 +1345,8 @@ panvk_compile_shader(struct panvk_device *dev,
|
|||
nir_assign_io_var_locations(nir, nir_var_shader_out);
|
||||
panvk_lower_nir_io(nir);
|
||||
|
||||
inputs.trust_varying_flat_highp_types = true;
|
||||
|
||||
variant->own_bin = true;
|
||||
|
||||
result = panvk_compile_nir(dev, nir, info->flags, &inputs, state,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue