-
Notifications
You must be signed in to change notification settings - Fork 6
/
bolt-gcc.bash
executable file
·153 lines (132 loc) · 5.34 KB
/
bolt-gcc.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/bin/bash
## Change here to your gcc version, you can find it with gcc -v "/usr/lib/gcc/x86_64-pc-linux-gnu/12"
GCCVER=12
## Base
TOPLEV=~/toolchain/gcc
## PATH for instrument data, when bolting without perf
DATA=${TOPLEV}/instrument
## GCC binary path to bolt
GCCPATH=/usr/lib/gcc/x86_64-pc-linux-gnu/${GCCVER}
## PATH where bolt is
BOLTPATH=~/toolchain/llvm/stage1/install/bin
## Change here the path to your perf.data if you have a cpu which supports LBR
## You need before running the script the perf.data with that command example:
## perf record -o perf.data -e cycles:u -j any,u -- 'command to run for example: make'
PERFDATA=/home/foo/perf.data
## Set here the stage you want to run
## STAGE 1 creates a instrumented binary, with that you need to run a workload to get profile data
## Stage 2 there we use llvm-bolt top optimize the binary
STAGE=
mkdir -p ${DATA}/cc1
mkdir -p ${DATA}/cc1plus
if [ ${STAGE} = 1 ]; then
echo "Instrument clang with llvm-bolt"
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \
--instrument \
--instrumentation-file-append-pid \
--instrumentation-file=${DATA}/cc1/cc1.fdata \
${GCCPATH}/cc1 \
-o ${DATA}/cc1/cc1
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt \
--instrument \
--instrumentation-file-append-pid \
--instrumentation-file=${DATA}/cc1plus/cc1plus.fdata \
${GCCPATH}/cc1plus \
-o ${DATA}/cc1plus/cc1plus
#echo "mooving instrumented binary"
sudo mv ${GCCPATH}/cc1 ${GCCPATH}/cc1.org
sudo mv ${DATA}/cc1/cc1 ${GCCPATH}/cc1
#echo "mooving instrumented binary"
sudo mv ${GCCPATH}/cc1plus ${GCCPATH}/cc1plus.org
sudo mv ${DATA}/cc1plus/cc1plus ${GCCPATH}/cc1plus
echo "Now move the binarys to the gcc path"
echo "now do some instrument compiles for example compiling a kernel or GCC"
fi
if [ ${STAGE} = 2 ]; then
echo "Instrument clang with llvm-bolt"
## Check if perf is available
perf record -e cycles:u -j any,u -- sleep 1 &>/dev/null;
if [[ $? == "0" ]]; then
echo "BOLTING with Profile!"
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${GCCPATH}/cc1.org \
-p ${PERFDATA} \
-o ${DATA}/cc1.fdata || (echo "Could not convert perf-data to bolt for clang-15"; exit 1)
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/perf2bolt ${GCCPATH}/cc1.org \
-p ${PERFDATA} \
-o ${DATA}/cc1plus.fdata || (echo "Could not convert perf-data to bolt for clang-15"; exit 1)
echo "Optimizing cc1 with the generated profile"
cd ${TOPLEV}
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \
--data ${DATA}/cc1.fdata \
-o ${TOPLEV}/cc1 \
-split-functions \
-split-all-cold \
-icf=1 \
-lite=1 \
-split-eh \
-use-gnu-stack \
-jump-tables=move \
-dyno-stats \
-reorder-functions=hfsort+ \
-reorder-blocks=ext-tsp \
-tail-duplication=cache || (echo "Could not optimize binary for cc1"; exit 1)
cd ${TOPLEV}
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \
--data ${DATA}/cc1plus.fdata \
-o ${TOPLEV}/cc1plus \
-split-functions \
-split-all-cold \
-icf=1 \
-lite=1 \
-split-eh \
-use-gnu-stack \
-jump-tables=move \
-dyno-stats \
-reorder-functions=hfsort+ \
-reorder-blocks=ext-tsp \
-tail-duplication=cache || (echo "Could not optimize binary for cc1plus"; exit 1)
else
echo "Merging generated profiles"
cd ${DATA}/cc1
${BOLTPATH}/merge-fdata *.fdata > cc1-combined.fdata
cd ${DATA}/cc1plus
${BOLTPATH}/merge-fdata *.fdata > cc1plus-combined.fdata
echo "Optimizing cc1 with the generated profile"
cd ${TOPLEV}
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1.org \
--data ${DATA}/cc1/cc1-combined.fdata \
-o ${TOPLEV}/cc1 \
-relocs \
-split-functions \
-split-all-cold \
-icf=1 \
-lite=1 \
-split-eh \
-use-gnu-stack \
-jump-tables=move \
-dyno-stats \
-reorder-functions=hfsort+ \
-reorder-blocks=ext-tsp \
-tail-duplication=cache || (echo "Could not optimize binary for cc1"; exit 1)
cd ${TOPLEV}
LD_PRELOAD=/usr/lib/libjemalloc.so ${BOLTPATH}/llvm-bolt ${GCCPATH}/cc1plus.org \
--data ${DATA}/cc1plus/cc1plus-combined.fdata \
-o ${TOPLEV}/cc1plus \
-relocs \
-split-functions \
-split-all-cold \
-icf=1 \
-lite=1 \
-split-eh \
-use-gnu-stack \
-jump-tables=move \
-dyno-stats \
-reorder-functions=hfsort+ \
-reorder-blocks=ext-tsp \
-tail-duplication=cache || (echo "Could not optimize binary for cc1plus"; exit 1)
echo "mooving bolted binary"
sudo mv ${TOPLEV}/cc1plus ${GCCPATH}/cc1plus
sudo mv ${TOPLEV}/cc1 ${GCCPATH}/cc1
echo "Now you can move the bolted binarys to your ${GCCPATH}"
fi
fi