-
Notifications
You must be signed in to change notification settings - Fork 3
/
plot-results.py
308 lines (255 loc) · 13.1 KB
/
plot-results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/usr/bin/env python3
import numpy as np
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
# Dictionary with nodes' names and corresponding IP addresses
nodes_dict = {}
# Necessary for plots of parallel attacks (merged results), specifies the maximum rate on a single node
max_load_on_node = 600
# Necessary for plots of parallel attacks (merged results), indicates the different phases of test, with duration and total rate of each phase
sectors_dict = {
'0': {'duration': '300',
'rate': '350'},
'1': {'duration': '300',
'rate': '550'},
'2': {'duration': '300',
'rate': '1050'},
}
'''
Read json file containing vegeta export.
This function takes only one param: the path of json file.
This function returns a dataframe containing status codes, timestamps, X-Server and URL headers values.
The returned dataframe represent one packet send for each row with respective response.
'''
def read_json(path):
code_stream = os.popen('cat ' + path + ' | jq -r .code')
url_stream = os.popen('cat ' + path + ' | jq -r .url')
timestamp_stream = os.popen('cat ' + path + ' | jq -r .timestamp')
x_server_ok_stream = os.popen('cat ' + path + ' | jq -r \'select(.code == 200) | .headers."X-Server"[]\'')
timestamp_ok_stream = os.popen('cat ' + path + ' | jq -r \'select(.code == 200) | .timestamp\'')
codes = code_stream.read().splitlines()
timestamp = timestamp_stream.read().splitlines()
x_server_ok = x_server_ok_stream.read().splitlines()
urls = url_stream.read().splitlines()
timestamp_ok = timestamp_ok_stream.read().splitlines()
df = pd.DataFrame.from_dict(
{"timestamp": pd.to_datetime(timestamp), "code": codes, "url": urls})
df_x_server_ok = pd.DataFrame.from_dict(
{"timestamp": pd.to_datetime(timestamp_ok), "X-Server": x_server_ok})
df = df.join(df_x_server_ok.set_index('timestamp'), on='timestamp')
df.sort_values("timestamp", inplace=True)
df.reset_index(drop=True, inplace=True)
return df
'''
Compute one column each parameter. Each of this columns has 1 correpsonding to rows that returns this specific value, 0 otherwise.
"node_x_server" columns specify if the node has replied, "node_url_ok" ("node_url_err") columns specify if the request directed to the node has been
(hasn't been) succesfully processed, "node_forwarded_req" columns specifiy if the request directed to the node has been forwarded to another node
and then successfully processed.
'''
def compute_df_columns(df):
df["Success"] = np.where(df["code"] == '200', 1, 0)
df["Error"] = np.where(df["code"] != '200', 1, 0)
for node in nodes_dict:
col_x_server = node + "_x_server"
df[col_x_server] = np.where(df["X-Server"] == nodes_dict[node], 1, 0)
col_url_ok = node + "_url_ok"
col_url_err = node + "_url_err"
df[col_url_ok] = np.where((df["url"].str.contains(nodes_dict[node])) & (df["code"] == '200'), 1, 0)
df[col_url_err] = np.where((df["url"].str.contains(nodes_dict[node])) & (df["code"] != '200'), 1, 0)
col_forwarded_req = node + "_forwarded_req"
df[col_forwarded_req] = np.where((df["url"].str.contains(nodes_dict[node])) & (df["code"] == '200') & (df["X-Server"] != nodes_dict[node]), 1, 0)
return df
'''
Group row by 'rate', obtaining one row for each second of test.
This function takes as input the source dataset, the rate of the test attack, if the input file is a "merged results" file, and return grouped dataset.
'''
def group_by_resp_for_seconds(df, rate, is_merged):
url_ok_cols = []
url_err_cols = []
x_server_cols = []
forwarded_req_cols = []
df_sectors = []
for node in nodes_dict:
x_server_cols.append(node + "_x_server")
url_ok_cols.append(node + "_url_ok")
url_err_cols.append(node + "_url_err")
forwarded_req_cols.append(node + "_forwarded_req")
if not is_merged:
df_by_second = df.groupby(lambda x: x // rate)[['Success', 'Error'] + x_server_cols + url_ok_cols + url_err_cols + forwarded_req_cols].sum()
else:
start_index = 0
for sector in sectors_dict:
rate = int(sectors_dict[sector]['rate'])
rows_num = int(sectors_dict[sector]['duration']) * rate
end_index = start_index + rows_num
df_sectors.append(df[start_index:end_index].groupby(lambda x: x // rate)[['Success', 'Error'] + x_server_cols + url_ok_cols + url_err_cols + forwarded_req_cols].sum())
start_index = end_index
df_by_second = pd.concat(df_sectors, ignore_index=True)
return df_by_second
'''
Compute nodes' responses subplots.
'''
def compute_nodes_subplots(df, cols, labels, path, max_y, width, height):
colors = plt.rcParams["axes.prop_cycle"]()
f, axes = plt.subplots(len(cols), 1, sharey=True)
f.set_size_inches(width, height)
f.supxlabel('Time (s)')
f.supylabel('Response rate [resp/s]')
for i in range(len(cols)):
c = next(colors)["color"]
axes[i].set_ylim(0, max_y)
axes[i].plot(df.index, df[cols[i]], color=c, label=labels[i])
axes[i].grid()
f.legend(loc='upper right')
plt.savefig(path)
'''
Compute nodes' responses and cumulatives subplots.
'''
def compute_nodes_subplots_and_cumulatives(df, cols, labels, path, max_y, width, height, log=False):
colors = plt.rcParams["axes.prop_cycle"]()
f, axes = plt.subplots(len(cols) - 1, 1)
f.set_size_inches(width, height)
f.supxlabel('Time (s)')
f.supylabel('Response rate [resp/s]')
for i in range(len(cols) - 2):
c = next(colors)["color"]
axes[i].set_ylim(0, max_y)
axes[i].plot(df.index, df[cols[i]], color=c, label=labels[i])
axes[i].grid()
f.legend(loc='upper right')
c = next(colors)["color"]
if log:
axes[len(cols) - 2].set_yscale('log')
axes[len(cols) - 2].set_ylim(0, 100000)
axes[len(cols) - 2].plot(df.index, np.cumsum(df[cols[len(cols) - 2]]), color='purple', label='Success')
axes[len(cols) - 2].plot(df.index, np.cumsum(df[cols[len(cols) - 1]]), color='red', label='Error')
axes[len(cols) - 2].set(ylabel='Cumulatives')
axes[len(cols) - 2].legend(loc='upper right')
axes[len(cols) - 2].grid()
plt.savefig(path)
'''
Compute nodes' client side success rate subplots with forwarded requests.
'''
def compute_nodes_subplots_with_forwarded_req(df, cols, labels, path, max_y, width, height):
colors = plt.rcParams["axes.prop_cycle"]()
f, axes = plt.subplots(int(len(cols) / 2), 1, sharey=True)
f.set_size_inches(width, height)
f.supxlabel('Time (s)')
f.supylabel('Client side success rate [req/s]')
for i in range(int(len(cols) / 2)):
c = next(colors)["color"]
axes[i].set_ylim(0, max_y)
axes[i].plot(df.index, df[cols[i]], color=c, label=labels[i])
axes[i].plot(df.index, df[cols[i + int(len(cols) / 2)]], color='gray', linestyle='dotted')
axes[i].grid()
f.legend(loc='upper right')
plt.savefig(path)
'''
Compute nodes' cumulatives subplots.
'''
def compute_nodes_cumulatives_subplots(df, cols_succ, cols_err, path, width, height, log=False):
colors = plt.rcParams["axes.prop_cycle"]()
f, axes = plt.subplots(len(cols_succ), 1, sharey=True)
f.set_size_inches(width, height)
f.supxlabel('Time (s)')
f.supylabel('Cumulatives')
for i in range(len(cols_succ)):
c = next(colors)["color"]
if log:
axes[i].set_yscale('log')
axes[i].title.set_text(list(nodes_dict.keys())[i])
axes[i].plot(df.index, np.cumsum(df[cols_succ[i]]), color=c, label='Success')
axes[i].plot(df.index, np.cumsum(df[cols_err[i]]), color='red', label='Error')
axes[i].legend(loc='upper right')
axes[i].grid()
plt.savefig(path)
'''
Compute cumulatives plot.
'''
def compute_cumulatives_plot(df, cols, labels, path, width, height, log=False):
plt.figure(figsize=(width, height))
plt.xlabel("Time (s)")
plt.ylabel("Cumulatives")
if log:
plt.yscale('log')
for i in range(len(cols)):
plt.plot(df.index, np.cumsum(df[cols[i]]), label=labels[i])
plt.legend(loc="upper right")
plt.grid()
plt.savefig(path)
'''
Compute report with success rate values.
'''
def compute_report(input_file_path, output_dir_path, is_merged):
succ_responses_num = int(os.popen('cat ' + input_file_path + '| jq -r \'select(.code == 200) | .code\' | jq -s \'. | length\'').read())
total_responses = int(os.popen('cat ' + input_file_path + '| jq -r \'.code\' | jq -s \'. | length\'').read())
success_rate = ( succ_responses_num / total_responses ) * 100
if is_merged:
nodes_succ_rate = {}
for node in nodes_dict:
succ_resp_num = int(os.popen('cat ' + input_file_path + '| jq -r \'select((.code == 200) and ( .url | contains(\"' + nodes_dict[node] + '\"))) | .code\' | jq -s \'. | length\'').read())
tot_req = int(os.popen('cat ' + input_file_path + '| jq -r \'select(.url | contains(\"' + nodes_dict[node] + '\")) | .code\' | jq -s \'. | length\'').read())
if tot_req > 0:
nodes_succ_rate[node] = ( succ_resp_num / tot_req ) * 100
with open(output_dir_path + "/report_values.txt", "w") as text_file:
text_file.write(f'Success rate: {success_rate:.2f}%\n')
if is_merged:
for node in nodes_succ_rate:
text_file.write(f'Success rate {node}: {nodes_succ_rate[node]:.2f}%\n')
'''
The script takes as input the following parameters: 'results.json' file path, output directory, rate of the attack
and a boolean parameter that specifies if the input file has been obtained merging results files of multiple parallel attacks.
If merged is true, "Rate" parameter isn't used and can be set to any value.
'''
def main(argv):
file_input, output_dir, rate, merged = str(argv[0]), str(argv[1]), int(argv[2]), eval(argv[3])
# Reading datasets from json file.
df1 = read_json(file_input)
# Calcolate additional columns.
df1 = compute_df_columns(df1)
# Group rows by 'rate' --> one for each second.
df_by_second1 = group_by_resp_for_seconds(df1, rate, merged)
# Compute plots
if not merged:
cols = []
for node in nodes_dict:
cols.append(node + "_x_server")
compute_nodes_subplots_and_cumulatives(df_by_second1, cols + ['Success', 'Error'], list(nodes_dict.keys()) + ['Success', 'Error'], output_dir + '/plot-nodes-responses-and-cumulatives.png', rate + 50, 10, 15, log=True)
compute_nodes_subplots(df_by_second1, cols, list(nodes_dict.keys()), output_dir + '/plot-nodes-responses.png', rate + 50, 10, 10)
compute_cumulatives_plot(df_by_second1, ['Success', 'Error'], ['Success', 'Error'], output_dir + '/plot-cumulatives.png', 10, 5)
compute_cumulatives_plot(df_by_second1, ['Success', 'Error'], ['Success', 'Error'], output_dir + '/plot-cumulatives-log.png', 10, 5, log=True)
else:
cols_succ = []
cols_err = []
cols_forwarded = []
cols = []
for node in nodes_dict:
cols_succ.append(node + "_url_ok")
cols_err.append(node + "_url_err")
cols_forwarded.append(node + "_forwarded_req")
cols.append(node + "_x_server")
compute_cumulatives_plot(df_by_second1, [node + "_url_ok", node + "_url_err"], ['Success', 'Error'], output_dir + f'/plot-cumulatives-log-{node}.png', 10, 5, log=True)
compute_nodes_subplots_with_forwarded_req(df_by_second1, cols_succ + cols_forwarded, list(nodes_dict.keys()), output_dir + '/plot-succ-by-node-with-forwarded.png', max_load_on_node + 20, 10, 10)
compute_nodes_subplots(df_by_second1, cols_succ, list(nodes_dict.keys()), output_dir + '/plot-succ-by-node.png', max_load_on_node + 20, 10, 10)
compute_nodes_subplots(df_by_second1, cols_err, list(nodes_dict.keys()), output_dir + '/plot-err-by-node.png', max_load_on_node + 20, 10, 10)
compute_nodes_cumulatives_subplots(df_by_second1, cols_succ, cols_err, output_dir + '/plot-cumulatives-by-node.png', 10, 10)
compute_nodes_cumulatives_subplots(df_by_second1, cols_succ, cols_err, output_dir + '/plot-cumulatives-by-node-log.png', 10, 10, log=True)
compute_nodes_subplots(df_by_second1, cols, list(nodes_dict.keys()), output_dir + '/plot-nodes-responses.png', max_load_on_node + 20, 10, 10)
compute_nodes_subplots_and_cumulatives(df_by_second1, cols + ['Success', 'Error'], list(nodes_dict.keys()) + ['Success', 'Error'], output_dir + '/plot-nodes-responses-and-cumulatives.png', max_load_on_node + 20, 10, 15, log=True)
# Compute report with success rates
compute_report(file_input, output_dir, is_merged=merged)
if __name__ == '__main__':
raw_nodes = os.getenv("NODES")
if raw_nodes is None:
print("Missing 'NODES' env variable, exiting...")
exit(1)
nodes = raw_nodes.split(":")
if len(nodes) != 3:
print("Only 3 nodes are supported, exiting...")
exit(1)
nodes_dict["Node light"] = nodes[0]
nodes_dict["Node mid"] = nodes[1]
nodes_dict["Node heavy"] = nodes[2]
main(sys.argv[1:])