-
Notifications
You must be signed in to change notification settings - Fork 54
/
Copy path0032-RISCV-Reserve-an-emergency-spill-slot-for-the-regist.patch
190 lines (183 loc) · 8.29 KB
/
0032-RISCV-Reserve-an-emergency-spill-slot-for-the-regist.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Subject: [RISCV] Reserve an emergency spill slot for the register scavenger
when necessary
---
lib/Target/RISCV/RISCVFrameLowering.cpp | 19 +++++++
lib/Target/RISCV/RISCVFrameLowering.h | 3 +
test/CodeGen/RISCV/large-stack.ll | 99 ++++++++++++++++++++++++++++++---
3 files changed, 114 insertions(+), 7 deletions(-)
diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp
index 85a354d0c12..32ed896bc98 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
using namespace llvm;
@@ -224,3 +225,21 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(RISCV::X1);
SavedRegs.set(RISCV::X8);
}
+
+void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &RISCV::GPRRegClass;
+ // estimateStackSize has been observed to under-estimate the final stack
+ // size, so give ourselves wiggle-room by checking for stack size
+ // representable an 11-bit signed field rather than 12-bits.
+ // FIXME: It may be possible to craft a function with a small stack that
+ // still needs an emergency spill slot for branch relaxation. This case
+ // would currently be missed.
+ if (!isInt<11>(MFI.estimateStackSize(MF))) {
+ int RegScavFI = MFI.CreateStackObject(
+ RegInfo->getSpillSize(*RC), RegInfo->getSpillAlignment(*RC), false);
+ RS->addScavengingFrameIndex(RegScavFI);
+ }
+}
diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h
index d92bb70c76d..ccf7e247b55 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/lib/Target/RISCV/RISCVFrameLowering.h
@@ -36,6 +36,9 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
bool hasFP(const MachineFunction &MF) const override;
MachineBasicBlock::iterator
diff --git a/test/CodeGen/RISCV/large-stack.ll b/test/CodeGen/RISCV/large-stack.ll
index cecca220e19..d3b429022a4 100644
--- a/test/CodeGen/RISCV/large-stack.ll
+++ b/test/CodeGen/RISCV/large-stack.ll
@@ -8,31 +8,116 @@ define void @test() nounwind {
; RV32I-LABEL: test:
; RV32I: # %bb.0:
; RV32I-NEXT: lui a0, 74565
-; RV32I-NEXT: addi a0, a0, 1664
+; RV32I-NEXT: addi a0, a0, 1680
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: lui a0, 74565
-; RV32I-NEXT: addi a0, a0, 1660
+; RV32I-NEXT: addi a0, a0, 1676
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: sw ra, 0(a0)
; RV32I-NEXT: lui a0, 74565
-; RV32I-NEXT: addi a0, a0, 1656
+; RV32I-NEXT: addi a0, a0, 1672
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: sw s0, 0(a0)
; RV32I-NEXT: lui a0, 74565
-; RV32I-NEXT: addi a0, a0, 1664
+; RV32I-NEXT: addi a0, a0, 1680
; RV32I-NEXT: add s0, sp, a0
; RV32I-NEXT: lui a0, 74565
-; RV32I-NEXT: addi a0, a0, 1656
+; RV32I-NEXT: addi a0, a0, 1672
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw s0, 0(a0)
; RV32I-NEXT: lui a0, 74565
-; RV32I-NEXT: addi a0, a0, 1660
+; RV32I-NEXT: addi a0, a0, 1676
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw ra, 0(a0)
; RV32I-NEXT: lui a0, 74565
-; RV32I-NEXT: addi a0, a0, 1664
+; RV32I-NEXT: addi a0, a0, 1680
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: jalr zero, ra, 0
%tmp = alloca [ 305419896 x i8 ] , align 4
ret void
}
+
+; This test case artificially produces register pressure which should force
+; use of the emergency spill slot.
+
+define void @test_emergency_spill_slot(i32 %a) nounwind {
+; RV32I-LABEL: test_emergency_spill_slot:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 98
+; RV32I-NEXT: addi a1, a1, -1376
+; RV32I-NEXT: sub sp, sp, a1
+; RV32I-NEXT: lui a1, 98
+; RV32I-NEXT: addi a1, a1, -1380
+; RV32I-NEXT: add a1, sp, a1
+; RV32I-NEXT: sw ra, 0(a1)
+; RV32I-NEXT: lui a1, 98
+; RV32I-NEXT: addi a1, a1, -1384
+; RV32I-NEXT: add a1, sp, a1
+; RV32I-NEXT: sw s0, 0(a1)
+; RV32I-NEXT: lui a1, 98
+; RV32I-NEXT: addi a1, a1, -1388
+; RV32I-NEXT: add a1, sp, a1
+; RV32I-NEXT: sw s1, 0(a1)
+; RV32I-NEXT: lui a1, 98
+; RV32I-NEXT: addi a1, a1, -1392
+; RV32I-NEXT: add a1, sp, a1
+; RV32I-NEXT: sw s2, 0(a1)
+; RV32I-NEXT: lui a1, 98
+; RV32I-NEXT: addi a1, a1, -1376
+; RV32I-NEXT: add s0, sp, a1
+; RV32I-NEXT: lui a1, 78
+; RV32I-NEXT: addi a1, a1, 512
+; RV32I-NEXT: lui a2, 1048478
+; RV32I-NEXT: addi a2, a2, 1388
+; RV32I-NEXT: add a2, s0, a2
+; RV32I-NEXT: addi a2, a2, 0
+; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: #APP
+; RV32I-NEXT: nop
+; RV32I-NEXT: #NO_APP
+; RV32I-NEXT: sw a0, 0(a1)
+; RV32I-NEXT: #APP
+; RV32I-NEXT: nop
+; RV32I-NEXT: #NO_APP
+; RV32I-NEXT: lui a0, 98
+; RV32I-NEXT: addi a0, a0, -1392
+; RV32I-NEXT: add a0, sp, a0
+; RV32I-NEXT: lw s2, 0(a0)
+; RV32I-NEXT: lui a0, 98
+; RV32I-NEXT: addi a0, a0, -1388
+; RV32I-NEXT: add a0, sp, a0
+; RV32I-NEXT: lw s1, 0(a0)
+; RV32I-NEXT: lui a0, 98
+; RV32I-NEXT: addi a0, a0, -1384
+; RV32I-NEXT: add a0, sp, a0
+; RV32I-NEXT: lw s0, 0(a0)
+; RV32I-NEXT: lui a0, 98
+; RV32I-NEXT: addi a0, a0, -1380
+; RV32I-NEXT: add a0, sp, a0
+; RV32I-NEXT: lw ra, 0(a0)
+; RV32I-NEXT: lui a0, 98
+; RV32I-NEXT: addi a0, a0, -1376
+; RV32I-NEXT: add sp, sp, a0
+; RV32I-NEXT: jalr zero, ra, 0
+ %data = alloca [ 100000 x i32 ] , align 4
+ %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000
+ %1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "nop", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r"()
+ %asmresult0 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 0
+ %asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 1
+ %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 2
+ %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 3
+ %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 4
+ %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 5
+ %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 6
+ %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 7
+ %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 8
+ %asmresult9 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 9
+ %asmresult10 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 10
+ %asmresult11 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 11
+ %asmresult12 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 12
+ %asmresult13 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 13
+ %asmresult14 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 14
+ store volatile i32 %a, i32* %ptr
+ tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult0, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14)
+ ret void
+}
--
2.16.2