ac,radeonsi: move guardband computations to common code

Added a comment from Marek Olsak explaining this.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40249>
This commit is contained in:
Samuel Pitoiset 2026-03-05 17:15:35 +01:00 committed by Marge Bot
parent 2ca7d93519
commit bcccd49368
5 changed files with 240 additions and 105 deletions

View file

@ -0,0 +1,107 @@
/*
* Copyright 2012 Advanced Micro Devices, Inc.
* Copyright 2026 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#include "amd_family.h"
#include "ac_gpu_info.h"
#include "ac_guardband.h"
void
ac_compute_guardband(const struct radeon_info *info, int minx, int miny,
int maxx, int maxy, enum ac_quant_mode quant_mode,
float clip_discard_distance, struct ac_guardband *guardband)
{
float left, top, right, bottom, max_range, guardband_x, guardband_y;
float scale[2], translate[2];
/* Determine the optimal hardware screen offset to center the viewport
* within the viewport range in order to maximize the guardband size.
*/
int hw_screen_offset_x = (maxx + minx) / 2;
int hw_screen_offset_y = (maxy + miny) / 2;
/* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */
const unsigned hw_screen_offset_alignment =
info->gfx_level >= GFX11 ? 32 :
info->gfx_level >= GFX8 ? 16 : MAX2(info->se_tile_repeat, 16);
const unsigned max_hw_screen_offset = info->gfx_level >= GFX12 ? 32768 : 8176;
/* Indexed by quantization modes */
static int max_viewport_size[] = {65536, 16384, 4096};
/* Ensure that the whole viewport stays representable in absolute
* coordinates.
*/
assert(maxx <= max_viewport_size[quant_mode] &&
maxy <= max_viewport_size[quant_mode]);
hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, max_hw_screen_offset);
hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, max_hw_screen_offset);
/* Align the screen offset by dropping the low bits. */
hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1);
hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1);
/* Apply the offset to center the viewport and maximize the guardband. */
minx -= hw_screen_offset_x;
maxx -= hw_screen_offset_x;
miny -= hw_screen_offset_y;
maxy -= hw_screen_offset_y;
/* Reconstruct the viewport transformation from the scissor. */
translate[0] = (minx + maxx) / 2.0;
translate[1] = (miny + maxy) / 2.0;
scale[0] = maxx - translate[0];
scale[1] = maxy - translate[1];
/* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
if (minx == maxx)
scale[0] = 0.5;
if (miny == maxy)
scale[1] = 0.5;
/* Find the biggest guard band that is inside the supported viewport range.
* The guard band is specified as a horizontal and vertical distance from
* (0,0) in clip space.
*
* This is done by applying the inverse viewport transformation on the
* viewport limits to get those limits in clip space.
*
* The viewport range is [-max_viewport_size/2 - 1, max_viewport_size/2].
* (-1 to the min coord because max_viewport_size is odd and ViewportBounds
* Min/Max are -32768, 32767).
*/
assert(quant_mode < ARRAY_SIZE(max_viewport_size));
max_range = max_viewport_size[quant_mode] / 2;
left = (-max_range - 1 - translate[0]) / scale[0];
right = (max_range - translate[0]) / scale[0];
top = (-max_range - 1 - translate[1]) / scale[1];
bottom = (max_range - translate[1]) / scale[1];
assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
guardband_x = MIN2(-left, right);
guardband_y = MIN2(-top, bottom);
float discard_x = 1.0;
float discard_y = 1.0;
/* Add half the point size / line width */
discard_x += clip_discard_distance / (2.0 * scale[0]);
discard_y += clip_discard_distance / (2.0 * scale[1]);
/* Discard primitives that would lie entirely outside the viewport area. */
discard_x = MIN2(discard_x, guardband_x);
discard_y = MIN2(discard_y, guardband_y);
guardband->clip_x = guardband_x;
guardband->clip_y = guardband_y;
guardband->discard_x = discard_x;
guardband->discard_y = discard_y;
guardband->hw_screen_offset_x = hw_screen_offset_x;
guardband->hw_screen_offset_y = hw_screen_offset_y;
}

View file

@ -0,0 +1,110 @@
/*
* Copyright 2012 Advanced Micro Devices, Inc.
* Copyright 2026 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef AC_GUARDBAND_H
#define AC_GUARDBAND_H
/*
* The discard X/Y fields determine clip-space X and Y distance from (0, 0)
* that defines the rectangle boundary of the visible viewport range area in
* clip space for the purpose of culling primitives outside the viewport.
* Normally, triangles would set this to 1, which means exactly the edge of
* the viewport, while points and lines would set it to 1 + the half point
* size or half line width because point and line culling is done against the
* point or line center, respectively, which can be slightly outside the
* visible range of the viewport while the edge of the point or line can be
* visible. That prevents points and lines from popping suddenly into view
* when their center enters the visible part of the viewport range. It's
* possible to set the discard X/Y fields to values very far into the
* non-visible range of the viewport (> 1) to essentially disable culling of
* primitives outside the visible range, but that's never useful. The discard
* X/Y fields only cause primitives completely outside the rectangle boundary
* to be culled, but those primitives that are only partially outside that
* area are kept, i.e. it only determines culling, not clipping.
*
* The clip X/Y fields determine clip-space X and Y distance from (0, 0) that
* defines the rectangle boundary of the area in clip space where clipping
* must absolutely occur. This should be set to the maximum area of the total
* viewport range including all invisible space. The purpose of this boundary
* is to prevent primitives that are partially outside the viewport range
* (e.g. [-32K, 32K]) from being forwarded to the rasterizer because the
* rasterizer can't represent positions outside the viewport range since it
* can (typically) only accept 16-bit integer positions in screen space, which
* is what really determines the viewport range limits.
*
* Here is an example of how both rectangles should be set for an 8K
* (8192x8192) viewport:
*
* -32K clip X/Y area +32K (ideally the same as the viewport range)
* --------------------------------
* | G U A R D B A N D |
* | discard X/Y area |
* | ------------ |
* | | visible | |
* | | viewport | |
* | | | |
* | ------------ |
* | -4K +4K |
* | |
* --------------------------------
*
*
* Since clipping is slow because it uses floating-point math to shift vertices
* and potentially generate extra primitives, the clipping optimization works
* as follows:
* If a primitive is fully outside the discard rectangle, it's culled.
* ("discard" means cull everything outside)
* If a primitive is partially inside and partially outside the discard X/Y
* rectangle, but fully inside the clip X/Y rectangle, it's kept. This is
* beneficial because the rasterizer can trivially skip pixels outside the
* visible viewport, but it can only accept primitives inside the viewport range
* (typically [-32K, 32K]). If a primitive is partially inside the discard X/Y
* rectangle (i.e. partially visible) and also partially outside the clip X/Y
* rectangle, it must be clipped because the rasterizer can't accept it (it
* overflows the 16-bit integer space). This is the only time when clipping must
* occur (potentially generating new primitives). The goal of the driver is to
* program the discard X/Y area as small as possible and the clip X/Y area as
* large as possible to make sure that this is very unlikely to happen.
* In this example, the discard X/Y fields are set to (1, 1), and the clip X/Y
* fields are set to (8, 8). The band outside the discard X/Y rectangle
* boundary and inside the clip X/Y rectangle boundary is called the guard band,
* and is used as a clipping optimization described above. In the example, the
* 8K viewport is centered in the viewport range by setting
* PA_SU_HARDWARE_SCREEN_OFFSET=(4K, 4K), which makes the size of the guard band
* on all sides equal. Centering the viewport is part of the clipping
* optimization because the discard X/Y and clip X/Y fields apply to both sides
* (left and right, top and bottom) and we want to maximize the clip X/Y values.
* If the viewport wasn't centered, we would have to program the fields to the
* minimum values of both sides.
*/
struct radeon_info;
enum ac_quant_mode
{
/* The small prim precision computation depends on the enum values to be like this. */
AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH,
AC_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,
AC_QUANT_MODE_12_12_FIXED_POINT_1_4096TH,
};
struct ac_guardband {
float clip_x;
float clip_y;
float discard_x;
float discard_y;
int hw_screen_offset_x;
int hw_screen_offset_y;
};
void
ac_compute_guardband(const struct radeon_info *info, int minx, int miny,
int maxx, int maxy, enum ac_quant_mode quant_mode,
float clip_discard_distance, struct ac_guardband *guardband);
#endif /* AC_GUARDBAND_H */

View file

@ -85,6 +85,8 @@ amd_common_files = files(
'ac_shader_args.h',
'ac_shader_util.c',
'ac_shader_util.h',
'ac_guardband.c',
'ac_guardband.h',
'ac_gather_context_rolls.c',
'ac_gpu_info.c',
'ac_gpu_info.h',

View file

@ -20,6 +20,7 @@
#include "util/log.h"
#include "ac_cmdbuf.h"
#include "ac_descriptors.h"
#include "ac_guardband.h"
#include "ac_sqtt.h"
#include "ac_spm.h"
#include "si_perfetto.h"
@ -747,20 +748,12 @@ struct si_framebuffer {
bool gfx12_has_hiz;
};
enum si_quant_mode
{
/* The small prim precision computation depends on the enum values to be like this. */
SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH,
SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,
SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH,
};
struct si_signed_scissor {
int minx;
int miny;
int maxx;
int maxy;
enum si_quant_mode quant_mode;
enum ac_quant_mode quant_mode;
};
struct si_viewports {

View file

@ -247,8 +247,7 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index)
{
const struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct si_signed_scissor vp_as_scissor;
struct pipe_viewport_state vp;
float left, top, right, bottom, max_range, guardband_x, guardband_y;
struct ac_guardband guardband;
if (sctx->vs_writes_viewport_index) {
/* Shaders can draw to any viewport. Make a union of all
@ -266,96 +265,20 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index)
* how large the viewport is. Assume the worst case.
*/
if (sctx->vs_disables_clipping_viewport)
vp_as_scissor.quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
vp_as_scissor.quant_mode = AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
/* Determine the optimal hardware screen offset to center the viewport
* within the viewport range in order to maximize the guardband size.
*/
int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2;
int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2;
/* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */
const unsigned hw_screen_offset_alignment =
sctx->gfx_level >= GFX11 ? 32 :
sctx->gfx_level >= GFX8 ? 16 : MAX2(sctx->screen->info.se_tile_repeat, 16);
const unsigned max_hw_screen_offset = sctx->gfx_level >= GFX12 ? 32768 : 8176;
/* Indexed by quantization modes */
static int max_viewport_size[] = {65536, 16384, 4096};
/* Ensure that the whole viewport stays representable in
* absolute coordinates.
* See comment in si_set_viewport_states.
*/
assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] &&
vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]);
hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, max_hw_screen_offset);
hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, max_hw_screen_offset);
/* Align the screen offset by dropping the low bits. */
hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1);
hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1);
/* Apply the offset to center the viewport and maximize the guardband. */
vp_as_scissor.minx -= hw_screen_offset_x;
vp_as_scissor.maxx -= hw_screen_offset_x;
vp_as_scissor.miny -= hw_screen_offset_y;
vp_as_scissor.maxy -= hw_screen_offset_y;
/* Reconstruct the viewport transformation from the scissor. */
vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0;
vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0;
vp.scale[0] = vp_as_scissor.maxx - vp.translate[0];
vp.scale[1] = vp_as_scissor.maxy - vp.translate[1];
/* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
if (vp_as_scissor.minx == vp_as_scissor.maxx)
vp.scale[0] = 0.5;
if (vp_as_scissor.miny == vp_as_scissor.maxy)
vp.scale[1] = 0.5;
/* Find the biggest guard band that is inside the supported viewport
* range. The guard band is specified as a horizontal and vertical
* distance from (0,0) in clip space.
*
* This is done by applying the inverse viewport transformation
* on the viewport limits to get those limits in clip space.
*
* The viewport range is [-max_viewport_size/2 - 1, max_viewport_size/2].
* (-1 to the min coord because max_viewport_size is odd and ViewportBounds
* Min/Max are -32768, 32767).
*/
assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
left = (-max_range - 1 - vp.translate[0]) / vp.scale[0];
right = (max_range - vp.translate[0]) / vp.scale[0];
top = (-max_range - 1 - vp.translate[1]) / vp.scale[1];
bottom = (max_range - vp.translate[1]) / vp.scale[1];
assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
guardband_x = MIN2(-left, right);
guardband_y = MIN2(-top, bottom);
float discard_x = 1.0;
float discard_y = 1.0;
float distance = sctx->current_clip_discard_distance;
/* Add half the point size / line width */
discard_x += distance / (2.0 * vp.scale[0]);
discard_y += distance / (2.0 * vp.scale[1]);
/* Discard primitives that would lie entirely outside the viewport area. */
discard_x = MIN2(discard_x, guardband_x);
discard_y = MIN2(discard_y, guardband_y);
ac_compute_guardband(&sctx->screen->info, vp_as_scissor.minx, vp_as_scissor.miny,
vp_as_scissor.maxx, vp_as_scissor.maxy,
vp_as_scissor.quant_mode, sctx->current_clip_discard_distance,
&guardband);
unsigned pa_su_vtx_cntl = S_028BE4_PIX_CENTER(rs->half_pixel_center) |
S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH +
vp_as_scissor.quant_mode);
unsigned pa_su_hardware_screen_offset = S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) |
S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4);
unsigned pa_su_hardware_screen_offset =
S_028234_HW_SCREEN_OFFSET_X(guardband.hw_screen_offset_x >> 4) |
S_028234_HW_SCREEN_OFFSET_Y(guardband.hw_screen_offset_y >> 4);
/* If any of the GB registers is updated, all of them must be updated.
* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ
@ -368,8 +291,8 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index)
pa_su_vtx_cntl);
gfx12_opt_set_context_reg4(R_02842C_PA_CL_GB_VERT_CLIP_ADJ,
AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x));
fui(guardband.clip_y), fui(guardband.discard_y),
fui(guardband.clip_x), fui(guardband.discard_x));
gfx12_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset);
@ -382,8 +305,8 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index)
pa_su_vtx_cntl);
gfx11_opt_set_context_reg4(R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x));
fui(guardband.clip_y), fui(guardband.discard_y),
fui(guardband.clip_x), fui(guardband.discard_x));
gfx11_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset);
@ -393,8 +316,8 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg5(R_028BE4_PA_SU_VTX_CNTL, AC_TRACKED_PA_SU_VTX_CNTL,
pa_su_vtx_cntl,
fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x));
fui(guardband.clip_y), fui(guardband.discard_y),
fui(guardband.clip_x), fui(guardband.discard_x));
radeon_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset);
@ -477,11 +400,11 @@ static void si_set_viewport_states(struct pipe_context *pctx, unsigned start_slo
*/
if (max_corner <= 1024) /* 4K scanline area for guardband */
scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
scissor->quant_mode = AC_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
else if (max_corner <= 4096) /* 16K scanline area for guardband */
scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
scissor->quant_mode = AC_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
else /* 64K scanline area for guardband */
scissor->quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
scissor->quant_mode = AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
}
if (start_slot == 0) {
@ -786,5 +709,5 @@ void si_init_viewport_functions(struct si_context *ctx)
ctx->b.set_window_rectangles = si_set_window_rectangles;
for (unsigned i = 0; i < 16; i++)
ctx->viewports.as_scissor[i].quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
ctx->viewports.as_scissor[i].quant_mode = AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH;
}