Skip to content

Commit

Permalink
lgc: Optimally insert return block during cps lowering
Browse files Browse the repository at this point in the history
If the original function does not have `return` instruction, we don't
need the runtime check to exit, thus no need to insert return block.

A handy command to fixup the test check:
sed -i 's/llvm.amdgcn.set.inactive.i32/llvm.amdgcn.set.inactive.{{(chain.arg.)?}}i32/g' lgc/test/Transforms/CpsLowering/*
  • Loading branch information
ruiling committed Oct 12, 2023
1 parent 3a2a469 commit 7a995d4
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 68 deletions.
22 changes: 14 additions & 8 deletions lgc/patch/PatchEntryPointMutate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,6 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu
// ret void
unsigned waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage);
Type *waveMaskTy = builder.getIntNTy(waveSize);
auto *chainBlock = BasicBlock::Create(func->getContext(), "chain.block", func);
auto *retBlock = BasicBlock::Create(func->getContext(), "ret.block", func);
// For continufy based continuation, the vgpr list: LocalInvocationId, vcr, vsp, ...
unsigned vcrIndexInVgpr = shaderInputs ? 1 : 0;
auto *vcr = builder.CreateExtractValue(vgprArg, vcrIndexInVgpr);
Expand Down Expand Up @@ -437,13 +435,21 @@ bool PatchEntryPointMutate::lowerCpsOps(Function *func, ShaderInputs *shaderInpu
execMask = builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_wwm, execMask);
}

auto *isNullTarget = builder.CreateICmpEQ(targetVcr, builder.getInt32(0));
builder.CreateCondBr(isNullTarget, retBlock, chainBlock);

builder.SetInsertPoint(retBlock);
builder.CreateRetVoid();
BasicBlock *chainBlock = nullptr;
// We only need to insert the return block if there is any return in original function, otherwise we just insert
// everything in the tail block.
if (!retInstrs.empty()) {
chainBlock = BasicBlock::Create(func->getContext(), "chain.block", func);
auto *retBlock = BasicBlock::Create(func->getContext(), "ret.block", func);
auto *isNullTarget = builder.CreateICmpEQ(targetVcr, builder.getInt32(0));
builder.CreateCondBr(isNullTarget, retBlock, chainBlock);

builder.SetInsertPoint(retBlock);
builder.CreateRetVoid();
}

builder.SetInsertPoint(chainBlock);
if (chainBlock)
builder.SetInsertPoint(chainBlock);
// Mask off metadata bits and setup jump target.
Value *addr32 = builder.CreateAnd(targetVcr, builder.getInt32(~0x3fu));
AddressExtender addressExtender(func);
Expand Down
15 changes: 5 additions & 10 deletions lgc/test/Transforms/CpsLowering/continuation-basic.lgc
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,10 @@ entry:
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP39]])
; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP41]])
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP42]], 0
; CHECK-NEXT: br i1 [[TMP44]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]]
; CHECK: chain.block:
; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], -64
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP45]], i64 0
; CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i32> [[TMP46]] to i64
; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32s(ptr inreg [[TMP48]], i32 inreg [[TMP43]], <16 x i32> inreg [[TMP33]], { i32, ptr addrspace(5), i32 } [[TMP13]], i32 0)
; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP42]], -64
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP44]], i64 0
; CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i32> [[TMP45]] to i64
; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32s(ptr inreg [[TMP47]], i32 inreg [[TMP43]], <16 x i32> inreg [[TMP33]], { i32, ptr addrspace(5), i32 } [[TMP13]], i32 0)
; CHECK-NEXT: unreachable
; CHECK: ret.block:
; CHECK-NEXT: ret void
;
15 changes: 5 additions & 10 deletions lgc/test/Transforms/CpsLowering/cps-entry-point.lgc
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,10 @@ attributes #5 = { nounwind willreturn memory(none) }
; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP41]], i32 [[TMP44]])
; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i32 [[TMP41]], [[TMP45]]
; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP46]])
; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i32 [[TMP45]], 0
; CHECK-NEXT: br i1 [[TMP48]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]]
; CHECK: chain.block:
; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP45]], -64
; CHECK-NEXT: [[TMP50:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP49]], i64 0
; CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i32> [[TMP50]] to i64
; CHECK-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32i32s(ptr inreg [[TMP52]], i32 inreg [[TMP47]], <16 x i32> inreg [[TMP40]], { i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 0)
; CHECK-NEXT: [[TMP48:%.*]] = and i32 [[TMP45]], -64
; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP48]], i64 0
; CHECK-NEXT: [[TMP50:%.*]] = bitcast <2 x i32> [[TMP49]] to i64
; CHECK-NEXT: [[TMP51:%.*]] = inttoptr i64 [[TMP50]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32i32s(ptr inreg [[TMP51]], i32 inreg [[TMP47]], <16 x i32> inreg [[TMP40]], { i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 0)
; CHECK-NEXT: unreachable
; CHECK: ret.block:
; CHECK-NEXT: ret void
;
15 changes: 5 additions & 10 deletions lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc
Original file line number Diff line number Diff line change
Expand Up @@ -253,17 +253,12 @@ attributes #7 = { nounwind willreturn memory(inaccessiblemem: read) }
; CHECK-NEXT: [[TMP100:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP99]])
; CHECK-NEXT: [[TMP101:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP98]])
; CHECK-NEXT: [[TMP102:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP100]])
; CHECK-NEXT: [[TMP103:%.*]] = icmp eq i32 [[TMP101]], 0
; CHECK-NEXT: br i1 [[TMP103]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]]
; CHECK: chain.block:
; CHECK-NEXT: [[TMP104:%.*]] = and i32 [[TMP101]], -64
; CHECK-NEXT: [[TMP105:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP104]], i64 0
; CHECK-NEXT: [[TMP106:%.*]] = bitcast <2 x i32> [[TMP105]] to i64
; CHECK-NEXT: [[TMP107:%.*]] = inttoptr i64 [[TMP106]] to ptr
; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32i32s(ptr inreg [[TMP107]], i32 inreg [[TMP102]], <20 x i32> inreg [[TMP92]], { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP65]], i32 0)
; CHECK-NEXT: [[TMP103:%.*]] = and i32 [[TMP101]], -64
; CHECK-NEXT: [[TMP104:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP103]], i64 0
; CHECK-NEXT: [[TMP105:%.*]] = bitcast <2 x i32> [[TMP104]] to i64
; CHECK-NEXT: [[TMP106:%.*]] = inttoptr i64 [[TMP105]] to ptr
; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32p5i32i32i32s(ptr inreg [[TMP106]], i32 inreg [[TMP102]], <20 x i32> inreg [[TMP92]], { <3 x i32>, i32, ptr addrspace(5), i32, i32, i32 } [[TMP65]], i32 0)
; CHECK-NEXT: unreachable
; CHECK: ret.block:
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@_rgen_1.resume.0
Expand Down
30 changes: 10 additions & 20 deletions lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc
Original file line number Diff line number Diff line change
Expand Up @@ -109,17 +109,12 @@ define void @test.2({ ptr addrspace(32) } %state) !lgc.cps !{i32 1} !lgc.shaders
; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP47]])
; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP46]])
; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP48]])
; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i32 [[TMP49]], 0
; CHECK-NEXT: br i1 [[TMP51]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]]
; CHECK: chain.block:
; CHECK-NEXT: [[TMP52:%.*]] = and i32 [[TMP49]], -64
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP52]], i64 0
; CHECK-NEXT: [[TMP54:%.*]] = bitcast <2 x i32> [[TMP53]] to i64
; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32i32s(ptr inreg [[TMP55]], i32 inreg [[TMP50]], <16 x i32> inreg [[TMP40]], { i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 0)
; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP49]], -64
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP51]], i64 0
; CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i32> [[TMP52]] to i64
; CHECK-NEXT: [[TMP54:%.*]] = inttoptr i64 [[TMP53]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32i32s(ptr inreg [[TMP54]], i32 inreg [[TMP50]], <16 x i32> inreg [[TMP40]], { i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 0)
; CHECK-NEXT: unreachable
; CHECK: ret.block:
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@test.1
Expand Down Expand Up @@ -172,17 +167,12 @@ define void @test.2({ ptr addrspace(32) } %state) !lgc.cps !{i32 1} !lgc.shaders
; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP40]])
; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP39]])
; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP41]])
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP42]], 0
; CHECK-NEXT: br i1 [[TMP44]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]]
; CHECK: chain.block:
; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], -64
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP45]], i64 0
; CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i32> [[TMP46]] to i64
; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5s(ptr inreg [[TMP48]], i32 inreg [[TMP43]], <16 x i32> inreg [[TMP33]], { i32, ptr addrspace(5) } [[TMP13]], i32 0)
; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP42]], -64
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP44]], i64 0
; CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i32> [[TMP45]] to i64
; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5) }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5s(ptr inreg [[TMP47]], i32 inreg [[TMP43]], <16 x i32> inreg [[TMP33]], { i32, ptr addrspace(5) } [[TMP13]], i32 0)
; CHECK-NEXT: unreachable
; CHECK: ret.block:
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@test.2
Expand Down
15 changes: 5 additions & 10 deletions lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,12 @@ else:
; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP47]])
; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP46]])
; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP48]])
; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i32 [[TMP49]], 0
; CHECK-NEXT: br i1 [[TMP51]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]]
; CHECK: chain.block:
; CHECK-NEXT: [[TMP52:%.*]] = and i32 [[TMP49]], -64
; CHECK-NEXT: [[TMP53:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP52]], i64 0
; CHECK-NEXT: [[TMP54:%.*]] = bitcast <2 x i32> [[TMP53]] to i64
; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32i32s(ptr inreg [[TMP55]], i32 inreg [[TMP50]], <16 x i32> inreg [[TMP40]], { i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 0)
; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP49]], -64
; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP51]], i64 0
; CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i32> [[TMP52]] to i64
; CHECK-NEXT: [[TMP54:%.*]] = inttoptr i64 [[TMP53]] to ptr
; CHECK-NEXT: call void (ptr, i32, <16 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v16i32.sl_i32p5i32i32s(ptr inreg [[TMP54]], i32 inreg [[TMP50]], <16 x i32> inreg [[TMP40]], { i32, ptr addrspace(5), i32, i32 } [[TMP20]], i32 0)
; CHECK-NEXT: unreachable
; CHECK: ret.block:
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@unify_jump_ret
Expand Down

0 comments on commit 7a995d4

Please sign in to comment.