radv: Use wave32 for RT on gfx11+
ACO got a lot better at forming VOPD instructions, and testing
feedback seems to point in a slightly positive direction for this.
gfx12 will also start requiring wave32 for dynamic VGPR allocation at
some point.
Measurements on navi31:
Cyberpunk 2077:
Difference at 95.0% confidence
1.12333 +/- 0.42876
1.88216% +/- 0.718391%
(Student's t, pooled s = 0.189165)
Black Myth Wukong benchmark:
Difference at 95.0% confidence
4 +/- 1.30862
13.9535% +/- 4.56495%
(Student's t, pooled s = 0.57735)
Portal with RTX:
66.2ms->61.5ms (~7.64% improvement)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39275>
This commit is contained in:
parent
fcf53988c4
commit
6f076cdfda
4 changed files with 10 additions and 17 deletions
|
|
@ -1577,10 +1577,8 @@ RADV driver environment variables
|
|||
disable optimizations that get enabled when all VRAM is CPU visible.
|
||||
``pswave32``
|
||||
enable wave32 for pixel shaders (GFX10+)
|
||||
``rtwave32``
|
||||
enable wave32 for ray tracing shaders (GFX11+)
|
||||
``rtwave64``
|
||||
enable wave64 for ray tracing shaders (GFX10-10.3)
|
||||
enable wave64 for ray tracing shaders (GFX10+)
|
||||
``sam``
|
||||
enable optimizations to move more driver internal objects to VRAM.
|
||||
``sparse``
|
||||
|
|
|
|||
|
|
@ -95,12 +95,11 @@ enum {
|
|||
RADV_PERFTEST_DMA_SHADERS = 1u << 11,
|
||||
RADV_PERFTEST_TRANSFER_QUEUE = 1u << 12,
|
||||
RADV_PERFTEST_NIR_CACHE = 1u << 13,
|
||||
RADV_PERFTEST_RT_WAVE_32 = 1u << 14,
|
||||
RADV_PERFTEST_VIDEO_ENCODE = 1u << 15,
|
||||
RADV_PERFTEST_NO_GTT_SPILL = 1u << 16,
|
||||
RADV_PERFTEST_HIC = 1u << 17,
|
||||
RADV_PERFTEST_SPARSE = 1u << 18,
|
||||
RADV_PERFTEST_RT_CPS = 1u << 19,
|
||||
RADV_PERFTEST_VIDEO_ENCODE = 1u << 14,
|
||||
RADV_PERFTEST_NO_GTT_SPILL = 1u << 15,
|
||||
RADV_PERFTEST_HIC = 1u << 16,
|
||||
RADV_PERFTEST_SPARSE = 1u << 17,
|
||||
RADV_PERFTEST_RT_CPS = 1u << 18,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
|
|
|||
|
|
@ -120,7 +120,6 @@ static const struct debug_control radv_perftest_options[] = {
|
|||
{"dmashaders", RADV_PERFTEST_DMA_SHADERS},
|
||||
{"transfer_queue", RADV_PERFTEST_TRANSFER_QUEUE},
|
||||
{"nircache", RADV_PERFTEST_NIR_CACHE},
|
||||
{"rtwave32", RADV_PERFTEST_RT_WAVE_32},
|
||||
{"video_encode", RADV_PERFTEST_VIDEO_ENCODE},
|
||||
{"nogttspill", RADV_PERFTEST_NO_GTT_SPILL},
|
||||
{"hic", RADV_PERFTEST_HIC},
|
||||
|
|
|
|||
|
|
@ -2489,16 +2489,13 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
|
|||
if (instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
|
||||
pdev->ge_wave_size = 32;
|
||||
|
||||
/* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence.
|
||||
* However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than
|
||||
* wave32 VOPD for VALU dependent code.
|
||||
* (as well as the SALU count becoming more problematic with wave32)
|
||||
/* Default to 32 on RDNA as that gives better perf due to less issues with divergence.
|
||||
* On GFX12+, wave32 will also be required for a future dynamic VGPR allocation implementation.
|
||||
*/
|
||||
if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || pdev->info.gfx_level < GFX11)
|
||||
pdev->rt_wave_size = 32;
|
||||
|
||||
if (radv_is_rt_wave64_enabled(instance))
|
||||
pdev->rt_wave_size = 64;
|
||||
else
|
||||
pdev->rt_wave_size = 32;
|
||||
}
|
||||
|
||||
radv_probe_video_decode(pdev);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue