From d30a6dcfa06196dd98fc898219eb12bab04a56de Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 8 Nov 2024 13:00:55 -0500 Subject: [PATCH] [AMDGPU] Reorganize tests to unblock #112403 (#115503) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We’re facing an issue (#113782) that is currently blocking #112403. However, since #112403 involves extensive test changes, I’d prefer to land it as soon as possible. This PR reorganizes the tests by moving test cases expected to fail into a separate file. Additionally, it changes the `[15 x i32]` arguments to `[13 x i32]` to bypass the issue. --- ...l-args-inreg-no-sgpr-for-csrspill-xfail.ll | 27 +++++ llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 104 +++++++----------- 2 files changed, 66 insertions(+), 65 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll new file mode 100644 index 00000000000000..5155a14f1f7963 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll @@ -0,0 +1,27 @@ +; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -filetype=null 2>&1 | FileCheck -enable-var-scope %s + +; CHECK: LLVM ERROR: failed to find free scratch register + +declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0 +declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0 +declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0 + +define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 { + call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0) + ret void +} + +define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 { + call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0) + ret void +} + +define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 { + call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) + ret void +} + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdhsa_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index 8766303d7ee6ec..d35b5fe818bef8 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -28,10 +28,8 @@ declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg) declare hidden void @external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg, i32 inreg, i64 inreg) #0 -declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0 -declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0 -declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0 -declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg, i32 inreg) #1 +declare hidden void @external_void_func_a15i32_inreg([13 x i32] inreg) #0 +declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg, i32 inreg) #1 define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_inreg: @@ -534,12 +532,6 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ret void } -; FIXME: -; define void @test_call_external_void_func_v16i32_inreg(<16 x i32> inreg %arg) #0 { -; call void @external_void_func_v16i32_inreg(<16 x i32> inreg %arg) -; ret void -; } - define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; GFX9-LABEL: test_call_external_void_func_f16_inreg: ; GFX9: ; %bb.0: @@ -1402,16 +1394,16 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ret void } -define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 { +define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #0 { ; GFX9-LABEL: test_call_external_void_func_a15i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s29, s33 +; GFX9-NEXT: s_mov_b32 s27, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 vcc, -1 +; GFX9-NEXT: s_or_saveexec_b64 s[28:29], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, vcc -; GFX9-NEXT: v_writelane_b32 v40, s29, 2 +; GFX9-NEXT: s_mov_b64 exec, s[28:29] +; GFX9-NEXT: v_writelane_b32 v40, s27, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s3, s17 @@ -1427,13 +1419,11 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) # ; GFX9-NEXT: s_mov_b32 s22, s24 ; GFX9-NEXT: s_mov_b32 s23, s25 ; GFX9-NEXT: s_mov_b32 s24, s26 -; GFX9-NEXT: s_mov_b32 s25, s27 -; GFX9-NEXT: s_mov_b32 s26, s28 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 vcc -; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], vcc +; GFX9-NEXT: s_getpc_b64 s[28:29] +; GFX9-NEXT: s_add_u32 s28, s28, external_void_func_a15i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s29, s29, external_void_func_a15i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[28:29] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 @@ -1448,19 +1438,17 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) # ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s25, s33 +; GFX11-NEXT: s_mov_b32 s23, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_or_saveexec_b32 s26, -1 +; GFX11-NEXT: s_or_saveexec_b32 s24, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s26 -; GFX11-NEXT: v_writelane_b32 v40, s25, 2 -; GFX11-NEXT: s_mov_b32 s26, s24 -; GFX11-NEXT: s_mov_b32 s25, s23 +; GFX11-NEXT: s_mov_b32 exec_lo, s24 +; GFX11-NEXT: v_writelane_b32 v40, s23, 2 ; GFX11-NEXT: s_mov_b32 s24, s22 ; GFX11-NEXT: s_mov_b32 s23, s21 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s22, s20 ; GFX11-NEXT: s_mov_b32 s21, s19 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s20, s18 ; GFX11-NEXT: s_mov_b32 s19, s17 ; GFX11-NEXT: s_mov_b32 s18, s16 @@ -1468,11 +1456,11 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) # ; GFX11-NEXT: s_mov_b32 s16, s6 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_getpc_b64 s[28:29] -; GFX11-NEXT: s_add_u32 s28, s28, external_void_func_a15i32_inreg@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s29, s29, external_void_func_a15i32_inreg@rel32@hi+12 +; GFX11-NEXT: s_getpc_b64 s[26:27] +; GFX11-NEXT: s_add_u32 s26, s26, external_void_func_a15i32_inreg@rel32@lo+4 +; GFX11-NEXT: s_addc_u32 s27, s27, external_void_func_a15i32_inreg@rel32@hi+12 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_swappc_b64 s[30:31], s[28:29] +; GFX11-NEXT: s_swappc_b64 s[30:31], s[26:27] ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 @@ -1483,34 +1471,22 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) # ; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0) + call void @external_void_func_a15i32_inreg([13 x i32] inreg %arg0) ret void } -; FIXME: -; define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 { -; call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0) -; ret void -; } - -; FIXME: -; define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 { -; call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) -; ret void -; } - ; FIXME: This should also fail -define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #1 { +define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inreg %arg0, i32 inreg %arg1) #1 { ; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s23, s33 +; GFX9-NEXT: s_mov_b32 s21, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[24:25], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[24:25] -; GFX9-NEXT: v_writelane_b32 v40, s23, 2 +; GFX9-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-NEXT: v_writelane_b32 v40, s21, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s3, s7 @@ -1527,13 +1503,11 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre ; GFX9-NEXT: s_mov_b32 s11, s18 ; GFX9-NEXT: s_mov_b32 s15, s19 ; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: s_mov_b32 s17, s21 -; GFX9-NEXT: s_mov_b32 s18, s22 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[24:25] -; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25] +; GFX9-NEXT: s_getpc_b64 s[22:23] +; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 @@ -1548,19 +1522,19 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre ; GFX11-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s19, s33 +; GFX11-NEXT: s_mov_b32 s17, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_or_saveexec_b32 s20, -1 +; GFX11-NEXT: s_or_saveexec_b32 s18, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s20 -; GFX11-NEXT: v_writelane_b32 v40, s19, 2 +; GFX11-NEXT: s_mov_b32 exec_lo, s18 +; GFX11-NEXT: v_writelane_b32 v40, s17, 2 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_getpc_b64 s[20:21] -; GFX11-NEXT: s_add_u32 s20, s20, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 -; GFX11-NEXT: s_addc_u32 s21, s21, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 +; GFX11-NEXT: s_getpc_b64 s[18:19] +; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 +; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[20:21] +; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 @@ -1572,7 +1546,7 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre ; GFX11-NEXT: s_mov_b32 s33, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] - call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg %arg0, i32 inreg %arg1) + call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg %arg0, i32 inreg %arg1) ret void }