-
Notifications
You must be signed in to change notification settings - Fork 173
/
superpixel-inceptionVxOnFire.py
251 lines (160 loc) · 9.55 KB
/
superpixel-inceptionVxOnFire.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
################################################################################
# Example : perform live fire detection in video using superpixel localization
# and the superpixel trained version of the InceptionV1-OnFire,
# InceptionV3-OnFire and InceptionV4-OnFire CNN models
# Copyright (c) 2017/18 - Andrew Dunnings / Toby Breckon, Durham University, UK
# Copyright (c) 2019/20 - Ganesh Samarth / Toby Breckon, Durham University, UK
# License : https://github.com/tobybreckon/fire-detection-cnn/blob/master/LICENSE
################################################################################
import cv2
import os
import sys
import math
import numpy as np
import argparse
################################################################################
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d, avg_pool_2d, global_avg_pool
from tflearn.layers.normalization import local_response_normalization, batch_normalization
from tflearn.layers.merge_ops import merge
from tflearn.layers.estimator import regression
################################################################################
from inceptionVxOnFire import construct_inceptionv1onfire, construct_inceptionv3onfire, construct_inceptionv4onfire
################################################################################
# extract non-zero region of interest (ROI) in an otherwise zero'd image
def extract_bounded_nonzero(input):
# take the first channel only (for speed)
gray = input[:, :, 0];
# find bounding rectangle of a non-zero region in an numpy array
# credit: https://stackoverflow.com/questions/31400769/bounding-box-of-numpy-array
rows = np.any(gray, axis=1)
cols = np.any(gray, axis=0)
rmin, rmax = np.where(rows)[0][[0, -1]]
cmin, cmax = np.where(cols)[0][[0, -1]]
# cropping the non zero image
return input[cmin:cmax,rmin:rmax]
################################################################################
# pad a supplied multi-channel image to the required [X,Y,C] size
def pad_image(image, new_width, new_height, pad_value = 0):
# create an image of zeros, the same size as padding target size
padded = np.zeros((new_width, new_height, image.shape[2]), dtype=np.uint8)
# compute where our input image will go to centre it within the padded image
pos_x = int(np.round((new_width / 2) - (image.shape[1] / 2)))
pos_y = int(np.round((new_height / 2) - (image.shape[0] / 2)))
# copy across the data from the input to the position centred within the padded image
padded[pos_y:image.shape[0]+pos_y,pos_x:image.shape[1]+pos_x] = image
return padded
################################################################################
# parse command line arguments
parser = argparse.ArgumentParser(description='Perform superpixel based InceptionV1/V3/V4 fire detection on incoming video')
parser.add_argument("-m", "--model_to_use", type=int, help="specify model to use", default=1, choices={1, 3, 4})
parser.add_argument('video_file', metavar='video_file', type=str, help='specify video file')
args = parser.parse_args()
# construct and display model
print("Constructing SP-InceptionV" + str(args.model_to_use) + "-OnFire ...")
if (args.model_to_use == 1):
# use InceptionV1-OnFire CNN model - [Dunning/Breckon, 2018]
model = construct_inceptionv1onfire (224, 224, training=False)
# also work around typo in naming of original models for V1 models [Dunning/Breckon, 2018] "...iononv ..."
model.load(os.path.join("models/SP-InceptionV1-OnFire", "sp-inceptiononv1onfire"),weights_only=True)
elif (args.model_to_use == 3):
# use InceptionV3-OnFire CNN model - [Samarth/Bhowmik/Breckon, 2019]
# N.B. weights_only=False as we are using Batch Normalization, and need those weights loaded also
model = construct_inceptionv3onfire (224, 224, training=False)
model.load(os.path.join("models/SP-InceptionV3-OnFire", "sp-inceptionv3onfire"),weights_only=False)
elif (args.model_to_use == 4):
# use InceptionV4-OnFire CNN model - [Samarth/Bhowmik/Breckon, 2019]
# N.B. weights_only=False as we are using Batch Normalization, and need those weights loaded also
model = construct_inceptionv4onfire (224, 224, training=False)
model.load(os.path.join("models/SP-InceptionV4-OnFire", "sp-inceptionv4onfire"),weights_only=False)
print("Loaded CNN network weights ...")
################################################################################
# network input sizes
rows = 224
cols = 224
# display and loop settings
windowName = "Live Fire Detection - Superpixels with SP-InceptionV" + str(args.model_to_use) + "-OnFire"
keepProcessing = True
################################################################################
# load video file from first command line argument
video = cv2.VideoCapture(args.video_file)
print("Loaded video ...")
# create window
cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
# get video properties
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video.get(cv2.CAP_PROP_FPS)
frame_time = round(1000/fps)
while (keepProcessing):
# start a timer (to see how long processing and display takes)
start_t = cv2.getTickCount()
# get video frame from file, handle end of file
ret, frame = video.read()
if not ret:
print("... end of video file reached")
break
# re-size image to network input size and perform prediction
small_frame = cv2.resize(frame, (rows, cols), cv2.INTER_AREA)
# OpenCV imgproc SLIC superpixels implementation below
slic = cv2.ximgproc.createSuperpixelSLIC(small_frame, region_size=22)
slic.iterate(10)
# getLabels method returns the different superpixel segments
segments = slic.getLabels()
# print(len(np.unique(segments)))
# loop over the unique segment values
for (i, segVal) in enumerate(np.unique(segments)):
# Construct a mask for the segment
mask = np.zeros(small_frame.shape[:2], dtype = "uint8")
mask[segments == segVal] = 255
# get contours (first checking if OPENCV >= 4.x)
if (int(cv2.__version__.split(".")[0]) >= 4):
contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
else:
im2, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# create the superpixel by applying the mask
# N.B. this creates an image of the full frame with this superpixel being the only non-zero
# (i.e. not black) region. CNN training/testing classification is performed using these
# full frame size images, rather than isolated small superpixel images.
# Using the approach, we re-use the same InceptionV1-OnFire architecture as described in
# the paper [Dunnings / Breckon, 2018] with no changes trained on full frame images each
# containing an isolated superpixel with the rest of the image being zero/black.
superpixel = cv2.bitwise_and(small_frame, small_frame, mask = mask)
# N.B. ... but for the later work using the InceptionV3-OnFire and InceptionV4-OnFire architecture
# as described in the paper [Samarth / Breckon, 2019] we instead centre and pad the resulting
# image with zeros
if ((args.model_to_use == 3) or (args.model_to_use == 4)):
# convert the superpixel from BGR to RGB space
superpixel = cv2.cvtColor(superpixel, cv2.COLOR_BGR2RGB)
# center and pad the superpixel in the centre of a (224 x 244 x 3) RGB image
superpixel = pad_image(extract_bounded_nonzero(superpixel), 224, 224)
# use loaded model to make prediction on given superpixel segment
# which is now:
# - an image (tensor) of dimension 224 x 224 x 3 (constructed from the superpixel as per above)
# - for InceptionV1-OnFire: a 3 channel colour image with channel ordering BGR (not RGB)
# - for InceptionV3-OnFire / InceptionV4-OnFire: a 3 channel colour image with channel ordering RGB
# - un-normalised (i.e. pixel range going into network is 0->255)
output = model.predict([superpixel])
# we know the green/red label seems back-to-front here (i.e.
# green means fire, red means no fire) but this is how we did it
# in the paper (?!) so we'll just keep the same crazyness for
# consistency with the paper figures
if round(output[0][0]) == 1: # equiv. to 0.5 threshold in [Dunnings / Breckon, 2018], [Samarth/Bhowmik/Breckon, 2019] test code
# draw the contour
# if prediction for FIRE was TRUE (round to 1), draw GREEN contour for superpixel
cv2.drawContours(small_frame, contours, -1, (0,255,0), 1)
else:
# if prediction for FIRE was FALSE, draw RED contour for superpixel
cv2.drawContours(small_frame, contours, -1, (0,0,255), 1)
# stop the timer and convert to ms. (to see how long processing and display takes)
stop_t = ((cv2.getTickCount() - start_t)/cv2.getTickFrequency()) * 1000
# image display and key handling
cv2.imshow(windowName, small_frame)
# wait fps time or less depending on processing time taken (e.g. 1000ms / 25 fps = 40 ms)
key = cv2.waitKey(max(2, frame_time - int(math.ceil(stop_t)))) & 0xFF
if (key == ord('x')):
keepProcessing = False
elif (key == ord('f')):
cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
################################################################################