diff --git a/src/amd/common/ac_guardband.c b/src/amd/common/ac_guardband.c new file mode 100644 index 00000000000..5c3fa9cb894 --- /dev/null +++ b/src/amd/common/ac_guardband.c @@ -0,0 +1,107 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2026 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include "amd_family.h" + +#include "ac_gpu_info.h" +#include "ac_guardband.h" + +void +ac_compute_guardband(const struct radeon_info *info, int minx, int miny, + int maxx, int maxy, enum ac_quant_mode quant_mode, + float clip_discard_distance, struct ac_guardband *guardband) +{ + float left, top, right, bottom, max_range, guardband_x, guardband_y; + float scale[2], translate[2]; + + /* Determine the optimal hardware screen offset to center the viewport + * within the viewport range in order to maximize the guardband size. + */ + int hw_screen_offset_x = (maxx + minx) / 2; + int hw_screen_offset_y = (maxy + miny) / 2; + + /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */ + const unsigned hw_screen_offset_alignment = + info->gfx_level >= GFX11 ? 32 : + info->gfx_level >= GFX8 ? 16 : MAX2(info->se_tile_repeat, 16); + const unsigned max_hw_screen_offset = info->gfx_level >= GFX12 ? 32768 : 8176; + + /* Indexed by quantization modes */ + static int max_viewport_size[] = {65536, 16384, 4096}; + + /* Ensure that the whole viewport stays representable in absolute + * coordinates. + */ + assert(maxx <= max_viewport_size[quant_mode] && + maxy <= max_viewport_size[quant_mode]); + + hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, max_hw_screen_offset); + hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, max_hw_screen_offset); + + /* Align the screen offset by dropping the low bits. */ + hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1); + hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1); + + /* Apply the offset to center the viewport and maximize the guardband. */ + minx -= hw_screen_offset_x; + maxx -= hw_screen_offset_x; + miny -= hw_screen_offset_y; + maxy -= hw_screen_offset_y; + + /* Reconstruct the viewport transformation from the scissor. */ + translate[0] = (minx + maxx) / 2.0; + translate[1] = (miny + maxy) / 2.0; + scale[0] = maxx - translate[0]; + scale[1] = maxy - translate[1]; + + /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ + if (minx == maxx) + scale[0] = 0.5; + if (miny == maxy) + scale[1] = 0.5; + + /* Find the biggest guard band that is inside the supported viewport range. + * The guard band is specified as a horizontal and vertical distance from + * (0,0) in clip space. + * + * This is done by applying the inverse viewport transformation on the + * viewport limits to get those limits in clip space. + * + * The viewport range is [-max_viewport_size/2 - 1, max_viewport_size/2]. + * (-1 to the min coord because max_viewport_size is odd and ViewportBounds + * Min/Max are -32768, 32767). + */ + assert(quant_mode < ARRAY_SIZE(max_viewport_size)); + max_range = max_viewport_size[quant_mode] / 2; + left = (-max_range - 1 - translate[0]) / scale[0]; + right = (max_range - translate[0]) / scale[0]; + top = (-max_range - 1 - translate[1]) / scale[1]; + bottom = (max_range - translate[1]) / scale[1]; + + assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1); + + guardband_x = MIN2(-left, right); + guardband_y = MIN2(-top, bottom); + + float discard_x = 1.0; + float discard_y = 1.0; + + /* Add half the point size / line width */ + discard_x += clip_discard_distance / (2.0 * scale[0]); + discard_y += clip_discard_distance / (2.0 * scale[1]); + + /* Discard primitives that would lie entirely outside the viewport area. */ + discard_x = MIN2(discard_x, guardband_x); + discard_y = MIN2(discard_y, guardband_y); + + guardband->clip_x = guardband_x; + guardband->clip_y = guardband_y; + guardband->discard_x = discard_x; + guardband->discard_y = discard_y; + guardband->hw_screen_offset_x = hw_screen_offset_x; + guardband->hw_screen_offset_y = hw_screen_offset_y; +} diff --git a/src/amd/common/ac_guardband.h b/src/amd/common/ac_guardband.h new file mode 100644 index 00000000000..5819cef84f5 --- /dev/null +++ b/src/amd/common/ac_guardband.h @@ -0,0 +1,110 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2026 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#ifndef AC_GUARDBAND_H +#define AC_GUARDBAND_H + +/* + * The discard X/Y fields determine clip-space X and Y distance from (0, 0) + * that defines the rectangle boundary of the visible viewport range area in + * clip space for the purpose of culling primitives outside the viewport. + * Normally, triangles would set this to 1, which means exactly the edge of + * the viewport, while points and lines would set it to 1 + the half point + * size or half line width because point and line culling is done against the + * point or line center, respectively, which can be slightly outside the + * visible range of the viewport while the edge of the point or line can be + * visible. That prevents points and lines from popping suddenly into view + * when their center enters the visible part of the viewport range. It's + * possible to set the discard X/Y fields to values very far into the + * non-visible range of the viewport (> 1) to essentially disable culling of + * primitives outside the visible range, but that's never useful. The discard + * X/Y fields only cause primitives completely outside the rectangle boundary + * to be culled, but those primitives that are only partially outside that + * area are kept, i.e. it only determines culling, not clipping. + * + * The clip X/Y fields determine clip-space X and Y distance from (0, 0) that + * defines the rectangle boundary of the area in clip space where clipping + * must absolutely occur. This should be set to the maximum area of the total + * viewport range including all invisible space. The purpose of this boundary + * is to prevent primitives that are partially outside the viewport range + * (e.g. [-32K, 32K]) from being forwarded to the rasterizer because the + * rasterizer can't represent positions outside the viewport range since it + * can (typically) only accept 16-bit integer positions in screen space, which + * is what really determines the viewport range limits. + * + * Here is an example of how both rectangles should be set for an 8K + * (8192x8192) viewport: + * + * -32K clip X/Y area +32K (ideally the same as the viewport range) + * -------------------------------- + * | G U A R D B A N D | + * | discard X/Y area | + * | ------------ | + * | | visible | | + * | | viewport | | + * | | | | + * | ------------ | + * | -4K +4K | + * | | + * -------------------------------- + * + * + * Since clipping is slow because it uses floating-point math to shift vertices + * and potentially generate extra primitives, the clipping optimization works + * as follows: + + * If a primitive is fully outside the discard rectangle, it's culled. + * ("discard" means cull everything outside) + * If a primitive is partially inside and partially outside the discard X/Y + * rectangle, but fully inside the clip X/Y rectangle, it's kept. This is + * beneficial because the rasterizer can trivially skip pixels outside the + * visible viewport, but it can only accept primitives inside the viewport range + * (typically [-32K, 32K]). If a primitive is partially inside the discard X/Y + * rectangle (i.e. partially visible) and also partially outside the clip X/Y + * rectangle, it must be clipped because the rasterizer can't accept it (it + * overflows the 16-bit integer space). This is the only time when clipping must + * occur (potentially generating new primitives). The goal of the driver is to + * program the discard X/Y area as small as possible and the clip X/Y area as + * large as possible to make sure that this is very unlikely to happen. + + * In this example, the discard X/Y fields are set to (1, 1), and the clip X/Y + * fields are set to (8, 8). The band outside the discard X/Y rectangle + * boundary and inside the clip X/Y rectangle boundary is called the guard band, + * and is used as a clipping optimization described above. In the example, the + * 8K viewport is centered in the viewport range by setting + * PA_SU_HARDWARE_SCREEN_OFFSET=(4K, 4K), which makes the size of the guard band + * on all sides equal. Centering the viewport is part of the clipping + * optimization because the discard X/Y and clip X/Y fields apply to both sides + * (left and right, top and bottom) and we want to maximize the clip X/Y values. + * If the viewport wasn't centered, we would have to program the fields to the + * minimum values of both sides. + */ +struct radeon_info; + +enum ac_quant_mode +{ + /* The small prim precision computation depends on the enum values to be like this. */ + AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH, + AC_QUANT_MODE_14_10_FIXED_POINT_1_1024TH, + AC_QUANT_MODE_12_12_FIXED_POINT_1_4096TH, +}; + +struct ac_guardband { + float clip_x; + float clip_y; + float discard_x; + float discard_y; + int hw_screen_offset_x; + int hw_screen_offset_y; +}; + +void +ac_compute_guardband(const struct radeon_info *info, int minx, int miny, + int maxx, int maxy, enum ac_quant_mode quant_mode, + float clip_discard_distance, struct ac_guardband *guardband); + +#endif /* AC_GUARDBAND_H */ diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index f9d8cd85db3..62094eaa7fe 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -85,6 +85,8 @@ amd_common_files = files( 'ac_shader_args.h', 'ac_shader_util.c', 'ac_shader_util.h', + 'ac_guardband.c', + 'ac_guardband.h', 'ac_gather_context_rolls.c', 'ac_gpu_info.c', 'ac_gpu_info.h', diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 5afae02805c..b4b1644837d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -20,6 +20,7 @@ #include "util/log.h" #include "ac_cmdbuf.h" #include "ac_descriptors.h" +#include "ac_guardband.h" #include "ac_sqtt.h" #include "ac_spm.h" #include "si_perfetto.h" @@ -747,20 +748,12 @@ struct si_framebuffer { bool gfx12_has_hiz; }; -enum si_quant_mode -{ - /* The small prim precision computation depends on the enum values to be like this. */ - SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH, - SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH, - SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH, -}; - struct si_signed_scissor { int minx; int miny; int maxx; int maxy; - enum si_quant_mode quant_mode; + enum ac_quant_mode quant_mode; }; struct si_viewports { diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 8a485c1b34c..4cfdfa5a1d3 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -247,8 +247,7 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index) { const struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_signed_scissor vp_as_scissor; - struct pipe_viewport_state vp; - float left, top, right, bottom, max_range, guardband_x, guardband_y; + struct ac_guardband guardband; if (sctx->vs_writes_viewport_index) { /* Shaders can draw to any viewport. Make a union of all @@ -266,96 +265,20 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index) * how large the viewport is. Assume the worst case. */ if (sctx->vs_disables_clipping_viewport) - vp_as_scissor.quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH; + vp_as_scissor.quant_mode = AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH; - /* Determine the optimal hardware screen offset to center the viewport - * within the viewport range in order to maximize the guardband size. - */ - int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2; - int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2; - - /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */ - const unsigned hw_screen_offset_alignment = - sctx->gfx_level >= GFX11 ? 32 : - sctx->gfx_level >= GFX8 ? 16 : MAX2(sctx->screen->info.se_tile_repeat, 16); - const unsigned max_hw_screen_offset = sctx->gfx_level >= GFX12 ? 32768 : 8176; - - /* Indexed by quantization modes */ - static int max_viewport_size[] = {65536, 16384, 4096}; - - /* Ensure that the whole viewport stays representable in - * absolute coordinates. - * See comment in si_set_viewport_states. - */ - assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] && - vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]); - - hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, max_hw_screen_offset); - hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, max_hw_screen_offset); - - /* Align the screen offset by dropping the low bits. */ - hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1); - hw_screen_offset_y &= ~(hw_screen_offset_alignment - 1); - - /* Apply the offset to center the viewport and maximize the guardband. */ - vp_as_scissor.minx -= hw_screen_offset_x; - vp_as_scissor.maxx -= hw_screen_offset_x; - vp_as_scissor.miny -= hw_screen_offset_y; - vp_as_scissor.maxy -= hw_screen_offset_y; - - /* Reconstruct the viewport transformation from the scissor. */ - vp.translate[0] = (vp_as_scissor.minx + vp_as_scissor.maxx) / 2.0; - vp.translate[1] = (vp_as_scissor.miny + vp_as_scissor.maxy) / 2.0; - vp.scale[0] = vp_as_scissor.maxx - vp.translate[0]; - vp.scale[1] = vp_as_scissor.maxy - vp.translate[1]; - - /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */ - if (vp_as_scissor.minx == vp_as_scissor.maxx) - vp.scale[0] = 0.5; - if (vp_as_scissor.miny == vp_as_scissor.maxy) - vp.scale[1] = 0.5; - - /* Find the biggest guard band that is inside the supported viewport - * range. The guard band is specified as a horizontal and vertical - * distance from (0,0) in clip space. - * - * This is done by applying the inverse viewport transformation - * on the viewport limits to get those limits in clip space. - * - * The viewport range is [-max_viewport_size/2 - 1, max_viewport_size/2]. - * (-1 to the min coord because max_viewport_size is odd and ViewportBounds - * Min/Max are -32768, 32767). - */ - assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size)); - max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2; - left = (-max_range - 1 - vp.translate[0]) / vp.scale[0]; - right = (max_range - vp.translate[0]) / vp.scale[0]; - top = (-max_range - 1 - vp.translate[1]) / vp.scale[1]; - bottom = (max_range - vp.translate[1]) / vp.scale[1]; - - assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1); - - guardband_x = MIN2(-left, right); - guardband_y = MIN2(-top, bottom); - - float discard_x = 1.0; - float discard_y = 1.0; - float distance = sctx->current_clip_discard_distance; - - /* Add half the point size / line width */ - discard_x += distance / (2.0 * vp.scale[0]); - discard_y += distance / (2.0 * vp.scale[1]); - - /* Discard primitives that would lie entirely outside the viewport area. */ - discard_x = MIN2(discard_x, guardband_x); - discard_y = MIN2(discard_y, guardband_y); + ac_compute_guardband(&sctx->screen->info, vp_as_scissor.minx, vp_as_scissor.miny, + vp_as_scissor.maxx, vp_as_scissor.maxy, + vp_as_scissor.quant_mode, sctx->current_clip_discard_distance, + &guardband); unsigned pa_su_vtx_cntl = S_028BE4_PIX_CENTER(rs->half_pixel_center) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) | S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH + vp_as_scissor.quant_mode); - unsigned pa_su_hardware_screen_offset = S_028234_HW_SCREEN_OFFSET_X(hw_screen_offset_x >> 4) | - S_028234_HW_SCREEN_OFFSET_Y(hw_screen_offset_y >> 4); + unsigned pa_su_hardware_screen_offset = + S_028234_HW_SCREEN_OFFSET_X(guardband.hw_screen_offset_x >> 4) | + S_028234_HW_SCREEN_OFFSET_Y(guardband.hw_screen_offset_y >> 4); /* If any of the GB registers is updated, all of them must be updated. * R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, R_028BEC_PA_CL_GB_VERT_DISC_ADJ @@ -368,8 +291,8 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index) pa_su_vtx_cntl); gfx12_opt_set_context_reg4(R_02842C_PA_CL_GB_VERT_CLIP_ADJ, AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, - fui(guardband_y), fui(discard_y), - fui(guardband_x), fui(discard_x)); + fui(guardband.clip_y), fui(guardband.discard_y), + fui(guardband.clip_x), fui(guardband.discard_x)); gfx12_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, pa_su_hardware_screen_offset); @@ -382,8 +305,8 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index) pa_su_vtx_cntl); gfx11_opt_set_context_reg4(R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, - fui(guardband_y), fui(discard_y), - fui(guardband_x), fui(discard_x)); + fui(guardband.clip_y), fui(guardband.discard_y), + fui(guardband.clip_x), fui(guardband.discard_x)); gfx11_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, pa_su_hardware_screen_offset); @@ -393,8 +316,8 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index) radeon_begin(&sctx->gfx_cs); radeon_opt_set_context_reg5(R_028BE4_PA_SU_VTX_CNTL, AC_TRACKED_PA_SU_VTX_CNTL, pa_su_vtx_cntl, - fui(guardband_y), fui(discard_y), - fui(guardband_x), fui(discard_x)); + fui(guardband.clip_y), fui(guardband.discard_y), + fui(guardband.clip_x), fui(guardband.discard_x)); radeon_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, pa_su_hardware_screen_offset); @@ -477,11 +400,11 @@ static void si_set_viewport_states(struct pipe_context *pctx, unsigned start_slo */ if (max_corner <= 1024) /* 4K scanline area for guardband */ - scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH; + scissor->quant_mode = AC_QUANT_MODE_12_12_FIXED_POINT_1_4096TH; else if (max_corner <= 4096) /* 16K scanline area for guardband */ - scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH; + scissor->quant_mode = AC_QUANT_MODE_14_10_FIXED_POINT_1_1024TH; else /* 64K scanline area for guardband */ - scissor->quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH; + scissor->quant_mode = AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH; } if (start_slot == 0) { @@ -786,5 +709,5 @@ void si_init_viewport_functions(struct si_context *ctx) ctx->b.set_window_rectangles = si_set_window_rectangles; for (unsigned i = 0; i < 16; i++) - ctx->viewports.as_scissor[i].quant_mode = SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH; + ctx->viewports.as_scissor[i].quant_mode = AC_QUANT_MODE_16_8_FIXED_POINT_1_256TH; }