From 6f076cdfda3dd0c86defb362210b22b7531739e1 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 12 Jan 2026 21:14:29 +0100 Subject: [PATCH] radv: Use wave32 for RT on gfx11+ ACO got a lot better at forming VOPD instructions, and testing feedback seems to point in a slightly positive direction for this. gfx12 will also start requiring wave32 for dynamic VGPR allocation at some point. Measurements on navi31: Cyberpunk 2077: Difference at 95.0% confidence 1.12333 +/- 0.42876 1.88216% +/- 0.718391% (Student's t, pooled s = 0.189165) Black Myth Wukong benchmark: Difference at 95.0% confidence 4 +/- 1.30862 13.9535% +/- 4.56495% (Student's t, pooled s = 0.57735) Portal with RTX: 66.2ms->61.5ms (~7.64% improvement) Part-of: --- docs/envvars.rst | 4 +--- src/amd/vulkan/radv_debug.h | 11 +++++------ src/amd/vulkan/radv_instance.c | 1 - src/amd/vulkan/radv_physical_device.c | 11 ++++------- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index 1dfb1323d51..034fac5fe51 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -1577,10 +1577,8 @@ RADV driver environment variables disable optimizations that get enabled when all VRAM is CPU visible. ``pswave32`` enable wave32 for pixel shaders (GFX10+) - ``rtwave32`` - enable wave32 for ray tracing shaders (GFX11+) ``rtwave64`` - enable wave64 for ray tracing shaders (GFX10-10.3) + enable wave64 for ray tracing shaders (GFX10+) ``sam`` enable optimizations to move more driver internal objects to VRAM. ``sparse`` diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 0f64278108f..09ce9f27e84 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -95,12 +95,11 @@ enum { RADV_PERFTEST_DMA_SHADERS = 1u << 11, RADV_PERFTEST_TRANSFER_QUEUE = 1u << 12, RADV_PERFTEST_NIR_CACHE = 1u << 13, - RADV_PERFTEST_RT_WAVE_32 = 1u << 14, - RADV_PERFTEST_VIDEO_ENCODE = 1u << 15, - RADV_PERFTEST_NO_GTT_SPILL = 1u << 16, - RADV_PERFTEST_HIC = 1u << 17, - RADV_PERFTEST_SPARSE = 1u << 18, - RADV_PERFTEST_RT_CPS = 1u << 19, + RADV_PERFTEST_VIDEO_ENCODE = 1u << 14, + RADV_PERFTEST_NO_GTT_SPILL = 1u << 15, + RADV_PERFTEST_HIC = 1u << 16, + RADV_PERFTEST_SPARSE = 1u << 17, + RADV_PERFTEST_RT_CPS = 1u << 18, }; enum { diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index e83af14d7f5..60a2838096d 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -120,7 +120,6 @@ static const struct debug_control radv_perftest_options[] = { {"dmashaders", RADV_PERFTEST_DMA_SHADERS}, {"transfer_queue", RADV_PERFTEST_TRANSFER_QUEUE}, {"nircache", RADV_PERFTEST_NIR_CACHE}, - {"rtwave32", RADV_PERFTEST_RT_WAVE_32}, {"video_encode", RADV_PERFTEST_VIDEO_ENCODE}, {"nogttspill", RADV_PERFTEST_NO_GTT_SPILL}, {"hic", RADV_PERFTEST_HIC}, diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 4c2acc60ca7..806a44e1867 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -2489,16 +2489,13 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm if (instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32) pdev->ge_wave_size = 32; - /* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence. - * However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than - * wave32 VOPD for VALU dependent code. - * (as well as the SALU count becoming more problematic with wave32) + /* Default to 32 on RDNA as that gives better perf due to less issues with divergence. + * On GFX12+, wave32 will also be required for a future dynamic VGPR allocation implementation. */ - if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || pdev->info.gfx_level < GFX11) - pdev->rt_wave_size = 32; - if (radv_is_rt_wave64_enabled(instance)) pdev->rt_wave_size = 64; + else + pdev->rt_wave_size = 32; } radv_probe_video_decode(pdev);