-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathall.py
2372 lines (1965 loc) · 131 KB
/
all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import tensorflow as tf
import keras.backend as K
from keras.engine.topology import InputSpec
from keras.engine.topology import Layer
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt
from keras.models import Model
from keras.layers import Input, Lambda, Conv2D, MaxPooling2D, BatchNormalization, ELU, Reshape, Concatenate, Activation
from keras.regularizers import l2
from keras.engine.topology import InputSpec
from keras.engine.topology import Layer
from collections import defaultdict
import warnings
import cv2
import random
import sklearn.utils
from copy import deepcopy
from PIL import Image
import csv
import os
#ssd_box_encode_decode_utils
def iou(boxes1, boxes2, coords='centroids'):
if len(boxes1.shape) > 2: raise ValueError("boxes1 must have rank either 1 or 2, but has rank {}.".format(len(boxes1.shape)))
if len(boxes2.shape) > 2: raise ValueError("boxes2 must have rank either 1 or 2, but has rank {}.".format(len(boxes2.shape)))
if len(boxes1.shape) == 1: boxes1 = np.expand_dims(boxes1, axis=0)
if len(boxes2.shape) == 1: boxes2 = np.expand_dims(boxes2, axis=0)
if not (boxes1.shape[1] == boxes2.shape[1] == 4): raise ValueError("It must be boxes1.shape[1] == boxes2.shape[1] == 4, but it is boxes1.shape[1] == {}, boxes2.shape[1] == {}.".format(boxes1.shape[1], boxes2.shape[1]))
if coords == 'centroids':
# TODO: Implement a version that uses fewer computation steps (that doesn't need conversion)
boxes1 = convert_coordinates(boxes1, start_index=0, conversion='centroids2minmax')
boxes2 = convert_coordinates(boxes2, start_index=0, conversion='centroids2minmax')
elif not (coords in {'minmax', 'corners'}):
raise ValueError("Unexpected value for `coords`. Supported values are 'minmax', 'corners' and 'centroids'.")
if coords in {'minmax', 'centroids'}:
intersection = np.maximum(0, np.minimum(boxes1[:,1], boxes2[:,1]) - np.maximum(boxes1[:,0], boxes2[:,0])) * np.maximum(0, np.minimum(boxes1[:,3], boxes2[:,3]) - np.maximum(boxes1[:,2], boxes2[:,2]))
union = (boxes1[:,1] - boxes1[:,0]) * (boxes1[:,3] - boxes1[:,2]) + (boxes2[:,1] - boxes2[:,0]) * (boxes2[:,3] - boxes2[:,2]) - intersection
elif coords == 'corners':
intersection = np.maximum(0, np.minimum(boxes1[:,2], boxes2[:,2]) - np.maximum(boxes1[:,0], boxes2[:,0])) * np.maximum(0, np.minimum(boxes1[:,3], boxes2[:,3]) - np.maximum(boxes1[:,1], boxes2[:,1]))
union = (boxes1[:,2] - boxes1[:,0]) * (boxes1[:,3] - boxes1[:,1]) + (boxes2[:,2] - boxes2[:,0]) * (boxes2[:,3] - boxes2[:,1]) - intersection
return intersection / union
def convert_coordinates(tensor, start_index, conversion):
ind = start_index
tensor1 = np.copy(tensor).astype(np.float)
if conversion == 'minmax2centroids':
tensor1[..., ind] = (tensor[..., ind] + tensor[..., ind+1]) / 2.0 # Set cx
tensor1[..., ind+1] = (tensor[..., ind+2] + tensor[..., ind+3]) / 2.0 # Set cy
tensor1[..., ind+2] = tensor[..., ind+1] - tensor[..., ind] # Set w
tensor1[..., ind+3] = tensor[..., ind+3] - tensor[..., ind+2] # Set h
elif conversion == 'centroids2minmax':
tensor1[..., ind] = tensor[..., ind] - tensor[..., ind+2] / 2.0 # Set xmin
tensor1[..., ind+1] = tensor[..., ind] + tensor[..., ind+2] / 2.0 # Set xmax
tensor1[..., ind+2] = tensor[..., ind+1] - tensor[..., ind+3] / 2.0 # Set ymin
tensor1[..., ind+3] = tensor[..., ind+1] + tensor[..., ind+3] / 2.0 # Set ymax
elif conversion == 'corners2centroids':
tensor1[..., ind] = (tensor[..., ind] + tensor[..., ind+2]) / 2.0 # Set cx
tensor1[..., ind+1] = (tensor[..., ind+1] + tensor[..., ind+3]) / 2.0 # Set cy
tensor1[..., ind+2] = tensor[..., ind+2] - tensor[..., ind] # Set w
tensor1[..., ind+3] = tensor[..., ind+3] - tensor[..., ind+1] # Set h
elif conversion == 'centroids2corners':
tensor1[..., ind] = tensor[..., ind] - tensor[..., ind+2] / 2.0 # Set xmin
tensor1[..., ind+1] = tensor[..., ind+1] - tensor[..., ind+3] / 2.0 # Set ymin
tensor1[..., ind+2] = tensor[..., ind] + tensor[..., ind+2] / 2.0 # Set xmax
tensor1[..., ind+3] = tensor[..., ind+1] + tensor[..., ind+3] / 2.0 # Set ymax
elif (conversion == 'minmax2corners') or (conversion == 'corners2minmax'):
tensor1[..., ind+1] = tensor[..., ind+2]
tensor1[..., ind+2] = tensor[..., ind+1]
else:
raise ValueError("Unexpected conversion value. Supported values are 'minmax2centroids', 'centroids2minmax', 'corners2centroids', 'centroids2corners', 'minmax2corners', and 'corners2minmax'.")
return tensor1
def convert_coordinates2(tensor, start_index, conversion):
ind = start_index
tensor1 = np.copy(tensor).astype(np.float)
if conversion == 'minmax2centroids':
M = np.array([[0.5, 0. , -1., 0.],
[0.5, 0. , 1., 0.],
[0. , 0.5, 0., -1.],
[0. , 0.5, 0., 1.]])
tensor1[..., ind:ind+4] = np.dot(tensor1[..., ind:ind+4], M)
elif conversion == 'centroids2minmax':
M = np.array([[ 1. , 1. , 0. , 0. ],
[ 0. , 0. , 1. , 1. ],
[-0.5, 0.5, 0. , 0. ],
[ 0. , 0. , -0.5, 0.5]]) # The multiplicative inverse of the matrix above
tensor1[..., ind:ind+4] = np.dot(tensor1[..., ind:ind+4], M)
else:
raise ValueError("Unexpected conversion value. Supported values are 'minmax2centroids' and 'centroids2minmax'.")
return tensor1
def greedy_nms(y_pred_decoded, iou_threshold=0.45, coords='corners'):
y_pred_decoded_nms = []
for batch_item in y_pred_decoded: # For the labels of each batch item...
boxes_left = np.copy(batch_item)
maxima = [] # This is where we store the boxes that make it through the non-maximum suppression
while boxes_left.shape[0] > 0: # While there are still boxes left to compare...
maximum_index = np.argmax(boxes_left[:,1]) # ...get the index of the next box with the highest confidence...
maximum_box = np.copy(boxes_left[maximum_index]) # ...copy that box and...
maxima.append(maximum_box) # ...append it to `maxima` because we'll definitely keep it
boxes_left = np.delete(boxes_left, maximum_index, axis=0) # Now remove the maximum box from `boxes_left`
if boxes_left.shape[0] == 0: break # If there are no boxes left after this step, break. Otherwise...
similarities = iou(boxes_left[:,2:], maximum_box[2:], coords=coords) # ...compare (IoU) the other left over boxes to the maximum box...
boxes_left = boxes_left[similarities <= iou_threshold] # ...so that we can remove the ones that overlap too much with the maximum box
y_pred_decoded_nms.append(np.array(maxima))
return y_pred_decoded_nms
def _greedy_nms(predictions, iou_threshold=0.45, coords='corners'):
boxes_left = np.copy(predictions)
maxima = [] # This is where we store the boxes that make it through the non-maximum suppression
while boxes_left.shape[0] > 0: # While there are still boxes left to compare...
maximum_index = np.argmax(boxes_left[:,0]) # ...get the index of the next box with the highest confidence...
maximum_box = np.copy(boxes_left[maximum_index]) # ...copy that box and...
maxima.append(maximum_box) # ...append it to `maxima` because we'll definitely keep it
boxes_left = np.delete(boxes_left, maximum_index, axis=0) # Now remove the maximum box from `boxes_left`
if boxes_left.shape[0] == 0: break # If there are no boxes left after this step, break. Otherwise...
similarities = iou(boxes_left[:,1:], maximum_box[1:], coords=coords) # ...compare (IoU) the other left over boxes to the maximum box...
boxes_left = boxes_left[similarities <= iou_threshold] # ...so that we can remove the ones that overlap too much with the maximum box
return np.array(maxima)
def _greedy_nms2(predictions, iou_threshold=0.45, coords='corners'):
boxes_left = np.copy(predictions)
maxima = [] # This is where we store the boxes that make it through the non-maximum suppression
while boxes_left.shape[0] > 0: # While there are still boxes left to compare...
maximum_index = np.argmax(boxes_left[:,1]) # ...get the index of the next box with the highest confidence...
maximum_box = np.copy(boxes_left[maximum_index]) # ...copy that box and...
maxima.append(maximum_box) # ...append it to `maxima` because we'll definitely keep it
boxes_left = np.delete(boxes_left, maximum_index, axis=0) # Now remove the maximum box from `boxes_left`
if boxes_left.shape[0] == 0: break # If there are no boxes left after this step, break. Otherwise...
similarities = iou(boxes_left[:,2:], maximum_box[2:], coords=coords) # ...compare (IoU) the other left over boxes to the maximum box...
boxes_left = boxes_left[similarities <= iou_threshold] # ...so that we can remove the ones that overlap too much with the maximum box
return np.array(maxima)
def decode_y(y_pred,
confidence_thresh=0.01,
iou_threshold=0.45,
top_k=200,
input_coords='centroids',
normalize_coords=False,
img_height=None,
img_width=None):
if normalize_coords and ((img_height is None) or (img_width is None)):
raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width))
# 1: Convert the box coordinates from the predicted anchor box offsets to predicted absolute coordinates
y_pred_decoded_raw = np.copy(y_pred[:,:,:-8]) # Slice out the classes and the four offsets, throw away the anchor coordinates and variances, resulting in a tensor of shape `[batch, n_boxes, n_classes + 4 coordinates]`
if input_coords == 'centroids':
y_pred_decoded_raw[:,:,[-2,-1]] = np.exp(y_pred_decoded_raw[:,:,[-2,-1]] * y_pred[:,:,[-2,-1]]) # exp(ln(w(pred)/w(anchor)) / w_variance * w_variance) == w(pred) / w(anchor), exp(ln(h(pred)/h(anchor)) / h_variance * h_variance) == h(pred) / h(anchor)
y_pred_decoded_raw[:,:,[-2,-1]] *= y_pred[:,:,[-6,-5]] # (w(pred) / w(anchor)) * w(anchor) == w(pred), (h(pred) / h(anchor)) * h(anchor) == h(pred)
y_pred_decoded_raw[:,:,[-4,-3]] *= y_pred[:,:,[-4,-3]] * y_pred[:,:,[-6,-5]] # (delta_cx(pred) / w(anchor) / cx_variance) * cx_variance * w(anchor) == delta_cx(pred), (delta_cy(pred) / h(anchor) / cy_variance) * cy_variance * h(anchor) == delta_cy(pred)
y_pred_decoded_raw[:,:,[-4,-3]] += y_pred[:,:,[-8,-7]] # delta_cx(pred) + cx(anchor) == cx(pred), delta_cy(pred) + cy(anchor) == cy(pred)
y_pred_decoded_raw = convert_coordinates(y_pred_decoded_raw, start_index=-4, conversion='centroids2corners')
elif input_coords == 'minmax':
y_pred_decoded_raw[:,:,-4:] *= y_pred[:,:,-4:] # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively
y_pred_decoded_raw[:,:,[-4,-3]] *= np.expand_dims(y_pred[:,:,-7] - y_pred[:,:,-8], axis=-1) # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred)
y_pred_decoded_raw[:,:,[-2,-1]] *= np.expand_dims(y_pred[:,:,-5] - y_pred[:,:,-6], axis=-1) # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred)
y_pred_decoded_raw[:,:,-4:] += y_pred[:,:,-8:-4] # delta(pred) + anchor == pred for all four coordinates
y_pred_decoded_raw = convert_coordinates(y_pred_decoded_raw, start_index=-4, conversion='minmax2corners')
elif input_coords == 'corners':
y_pred_decoded_raw[:,:,-4:] *= y_pred[:,:,-4:] # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively
y_pred_decoded_raw[:,:,[-4,-2]] *= np.expand_dims(y_pred[:,:,-6] - y_pred[:,:,-8], axis=-1) # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred)
y_pred_decoded_raw[:,:,[-3,-1]] *= np.expand_dims(y_pred[:,:,-5] - y_pred[:,:,-7], axis=-1) # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred)
y_pred_decoded_raw[:,:,-4:] += y_pred[:,:,-8:-4] # delta(pred) + anchor == pred for all four coordinates
else:
raise ValueError("Unexpected value for `input_coords`. Supported input coordinate formats are 'minmax', 'corners' and 'centroids'.")
# 2: If the model predicts normalized box coordinates and they are supposed to be converted back to absolute coordinates, do that
if normalize_coords:
y_pred_decoded_raw[:,:,[-4,-2]] *= img_width # Convert xmin, xmax back to absolute coordinates
y_pred_decoded_raw[:,:,[-3,-1]] *= img_height # Convert ymin, ymax back to absolute coordinates
# 3: Apply confidence thresholding and non-maximum suppression per class
n_classes = y_pred_decoded_raw.shape[-1] - 4 # The number of classes is the length of the last axis minus the four box coordinates
y_pred_decoded = [] # Store the final predictions in this list
for batch_item in y_pred_decoded_raw: # `batch_item` has shape `[n_boxes, n_classes + 4 coords]`
pred = [] # Store the final predictions for this batch item here
for class_id in range(1, n_classes): # For each class except the background class (which has class ID 0)...
single_class = batch_item[:,[class_id, -4, -3, -2, -1]] # ...keep only the confidences for that class, making this an array of shape `[n_boxes, 5]` and...
threshold_met = single_class[single_class[:,0] > confidence_thresh] # ...keep only those boxes with a confidence above the set threshold.
if threshold_met.shape[0] > 0: # If any boxes made the threshold...
maxima = _greedy_nms(threshold_met, iou_threshold=iou_threshold, coords='corners') # ...perform NMS on them.
maxima_output = np.zeros((maxima.shape[0], maxima.shape[1] + 1)) # Expand the last dimension by one element to have room for the class ID. This is now an arrray of shape `[n_boxes, 6]`
maxima_output[:,0] = class_id # Write the class ID to the first column...
maxima_output[:,1:] = maxima # ...and write the maxima to the other columns...
pred.append(maxima_output) # ...and append the maxima for this class to the list of maxima for this batch item.
# Once we're through with all classes, keep only the `top_k` maxima with the highest scores
if pred: # If there are any predictions left after confidence-thresholding...
pred = np.concatenate(pred, axis=0)
if pred.shape[0] > top_k: # If we have more than `top_k` results left at this point, otherwise there is nothing to filter,...
top_k_indices = np.argpartition(pred[:,1], kth=pred.shape[0]-top_k, axis=0)[pred.shape[0]-top_k:] # ...get the indices of the `top_k` highest-score maxima...
pred = pred[top_k_indices] # ...and keep only those entries of `pred`...
y_pred_decoded.append(pred) # ...and now that we're done, append the array of final predictions for this batch item to the output list
return y_pred_decoded
def decode_y2(y_pred,
confidence_thresh=0.5,
iou_threshold=0.45,
top_k='all',
input_coords='centroids',
normalize_coords=False,
img_height=None,
img_width=None):
predictor_sizes = np.array(predictor_sizes)
if len(predictor_sizes.shape) == 1:
predictor_sizes = np.expand_dims(predictor_sizes, axis=0)
if (min_scale is None or max_scale is None) and scales is None:
raise ValueError("Either `min_scale` and `max_scale` or `scales` need to be specified.")
if scales:
if (len(scales) != len(predictor_sizes)+1): # Must be two nested `if` statements since `list` and `bool` cannot be combined by `&`
raise ValueError("It must be either scales is None or len(scales) == len(predictor_sizes)+1, but len(scales) == {} and len(predictor_sizes)+1 == {}".format(len(scales), len(predictor_sizes)+1))
scales = np.array(scales)
if np.any(scales <= 0):
raise ValueError("All values in `scales` must be greater than 0, but the passed list of scales is {}".format(scales))
else: # If no list of scales was passed, we need to make sure that `min_scale` and `max_scale` are valid values.
if not 0 < min_scale <= max_scale:
raise ValueError("It must be 0 < min_scale <= max_scale, but it is min_scale = {} and max_scale = {}".format(min_scale, max_scale))
if not (aspect_ratios_per_layer is None):
if (len(aspect_ratios_per_layer) != len(predictor_sizes)): # Must be two nested `if` statements since `list` and `bool` cannot be combined by `&`
raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == len(predictor_sizes), but len(aspect_ratios_per_layer) == {} and len(predictor_sizes) == {}".format(len(aspect_ratios_per_layer), len(predictor_sizes)))
for aspect_ratios in aspect_ratios_per_layer:
if np.any(np.array(aspect_ratios) <= 0):
raise ValueError("All aspect ratios must be greater than zero.")
else:
if (aspect_ratios_global is None):
raise ValueError("At least one of `aspect_ratios_global` and `aspect_ratios_per_layer` must not be `None`.")
if np.any(np.array(aspect_ratios_global) <= 0):
raise ValueError("All aspect ratios must be greater than zero.")
if len(variances) != 4:
raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
variances = np.array(variances)
if np.any(variances <= 0):
raise ValueError("All variances must be >0, but the variances given are {}".format(variances))
if neg_iou_threshold > pos_iou_threshold:
raise ValueError("It cannot be `neg_iou_threshold > pos_iou_threshold`.")
if not (coords == 'minmax' or coords == 'centroids' or coords == 'corners'):
raise ValueError("Unexpected value for `coords`. Supported values are 'minmax', 'corners' and 'centroids'.")
if (not (steps is None)) and (len(steps) != len(predictor_sizes)):
raise ValueError("You must provide at least one step value per predictor layer.")
if (not (offsets is None)) and (len(offsets) != len(predictor_sizes)):
raise ValueError("You must provide at least one offset value per predictor layer.")
self.img_height = img_height
self.img_width = img_width
self.n_classes = n_classes + 1
self.predictor_sizes = predictor_sizes
self.min_scale = min_scale
self.max_scale = max_scale
if (scales is None):
self.scales = np.linspace(self.min_scale, self.max_scale, len(self.predictor_sizes)+1)
else:
# If a list of scales is given explicitly, we'll use that instead of computing it from `min_scale` and `max_scale`.
self.scales = scales
if (aspect_ratios_per_layer is None):
self.aspect_ratios = [aspect_ratios_global] * len(predictor_sizes)
else:
# If aspect ratios are given per layer, we'll use those.
self.aspect_ratios = aspect_ratios_per_layer
self.two_boxes_for_ar1 = two_boxes_for_ar1
if (not steps is None):
self.steps = steps
else:
self.steps = [None] * len(predictor_sizes)
if (not offsets is None):
self.offsets = offsets
else:
self.offsets = [None] * len(predictor_sizes)
self.limit_boxes = limit_boxes
self.variances = variances
self.pos_iou_threshold = pos_iou_threshold
self.neg_iou_threshold = neg_iou_threshold
self.coords = coords
self.normalize_coords = normalize_coords
# Compute the number of boxes per cell.
if aspect_ratios_per_layer:
self.n_boxes = []
for aspect_ratios in aspect_ratios_per_layer:
if (1 in aspect_ratios) & two_boxes_for_ar1:
self.n_boxes.append(len(aspect_ratios) + 1)
else:
self.n_boxes.append(len(aspect_ratios))
else:
if (1 in aspect_ratios_global) & two_boxes_for_ar1:
self.n_boxes = len(aspect_ratios_global) + 1
else:
self.n_boxes = len(aspect_ratios_global)
# Compute the anchor boxes for all the predictor layers. We only have to do this once
# as the anchor boxes depend only on the model configuration, not on the input data.
# For each conv predictor layer (i.e. for each scale factor)the tensors for that lauer's
# anchor boxes will have the shape `(feature_map_height, feature_map_width, n_boxes, 4)`.
self.boxes_list = [] # This will contain the anchor boxes for each predicotr layer.
self.wh_list_diag = [] # Box widths and heights for each predictor layer
self.steps_diag = [] # Horizontal and vertical distances between any two boxes for each predictor layer
self.offsets_diag = [] # Offsets for each predictor layer
self.centers_diag = [] # Anchor box center points as `(cy, cx)` for each predictor layer
for i in range(len(self.predictor_sizes)):
boxes, center, wh, step, offset = self.generate_anchor_boxes_for_layer(feature_map_size=self.predictor_sizes[i],
aspect_ratios=self.aspect_ratios[i],
this_scale=self.scales[i],
next_scale=self.scales[i+1],
this_steps=self.steps[i],
this_offsets=self.offsets[i],
diagnostics=True)
self.boxes_list.append(boxes)
self.wh_list_diag.append(wh)
self.steps_diag.append(step)
self.offsets_diag.append(offset)
self.centers_diag.append(center)
def generate_anchor_boxes_for_layer(self,
feature_map_size,
aspect_ratios,
this_scale,
next_scale,
this_steps=None,
this_offsets=None,
diagnostics=False):
# Compute box width and height for each aspect ratio.
# The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
size = min(self.img_height, self.img_width)
# Compute the box widths and and heights for all aspect ratios
wh_list = []
for ar in aspect_ratios:
if (ar == 1):
# Compute the regular anchor box for aspect ratio 1.
box_height = box_width = this_scale * size
wh_list.append((box_width, box_height))
if self.two_boxes_for_ar1:
# Compute one slightly larger version using the geometric mean of this scale value and the next.
box_height = box_width = np.sqrt(this_scale * next_scale) * size
wh_list.append((box_width, box_height))
else:
box_width = this_scale * size * np.sqrt(ar)
box_height = this_scale * size / np.sqrt(ar)
wh_list.append((box_width, box_height))
wh_list = np.array(wh_list)
n_boxes = len(wh_list)
# Compute the grid of box center points. They are identical for all aspect ratios.
# Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally.
if (this_steps is None):
step_height = self.img_height / feature_map_size[0]
step_width = self.img_width / feature_map_size[1]
else:
if isinstance(this_steps, (list, tuple)) and (len(this_steps) == 2):
step_height = this_steps[0]
step_width = this_steps[1]
elif isinstance(this_steps, (int, float)):
step_height = this_steps
step_width = this_steps
# Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image.
if (this_offsets is None):
offset_height = 0.5
offset_width = 0.5
else:
if isinstance(this_offsets, (list, tuple)) and (len(this_offsets) == 2):
offset_height = this_offsets[0]
offset_width = this_offsets[1]
elif isinstance(this_offsets, (int, float)):
offset_height = this_offsets
offset_width = this_offsets
# Now that we have the offsets and step sizes, compute the grid of anchor box center points.
cy = np.linspace(offset_height * step_height, (offset_height + feature_map_size[0] - 1) * step_height, feature_map_size[0])
cx = np.linspace(offset_width * step_width, (offset_width + feature_map_size[1] - 1) * step_width, feature_map_size[1])
cx_grid, cy_grid = np.meshgrid(cx, cy)
cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down
cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down
# Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
# where the last dimension will contain `(cx, cy, w, h)`
boxes_tensor = np.zeros((feature_map_size[0], feature_map_size[1], n_boxes, 4))
boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, n_boxes)) # Set cx
boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, n_boxes)) # Set cy
boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h
# Convert `(cx, cy, w, h)` to `(xmin, ymin, xmax, ymax)`
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners')
# If `limit_boxes` is enabled, clip the coordinates to lie within the image boundaries
if self.limit_boxes:
x_coords = boxes_tensor[:,:,:,[0, 2]]
x_coords[x_coords >= self.img_width] = self.img_width - 1
x_coords[x_coords < 0] = 0
boxes_tensor[:,:,:,[0, 2]] = x_coords
y_coords = boxes_tensor[:,:,:,[1, 3]]
y_coords[y_coords >= self.img_height] = self.img_height - 1
y_coords[y_coords < 0] = 0
boxes_tensor[:,:,:,[1, 3]] = y_coords
# `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
if self.normalize_coords:
boxes_tensor[:, :, :, [0, 2]] /= self.img_width
boxes_tensor[:, :, :, [1, 3]] /= self.img_height
# TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth.
if self.coords == 'centroids':
# Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`.
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids')
elif self.coords == 'minmax':
# Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax).
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax')
if diagnostics:
return boxes_tensor, (cy, cx), wh_list, (step_height, step_width), (offset_height, offset_width)
else:
return boxes_tensor
def generate_encode_template(self, batch_size, diagnostics=False):
# Tile the anchor boxes for each predictor layer across all batch items.
boxes_batch = []
for boxes in self.boxes_list:
# Prepend one dimension to `self.boxes_list` to account for the batch size and tile it along.
# The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 4)`
boxes = np.expand_dims(boxes, axis=0)
boxes = np.tile(boxes, (batch_size, 1, 1, 1, 1))
# Now reshape the 5D tensor above into a 3D tensor of shape
# `(batch, feature_map_height * feature_map_width * n_boxes, 4)`. The resulting
# order of the tensor content will be identical to the order obtained from the reshaping operation
# in our Keras model (we're using the Tensorflow backend, and tf.reshape() and np.reshape()
# use the same default index order, which is C-like index ordering)
boxes = np.reshape(boxes, (batch_size, -1, 4))
boxes_batch.append(boxes)
# Concatenate the anchor tensors from the individual layers to one.
boxes_tensor = np.concatenate(boxes_batch, axis=1)
# 3: Create a template tensor to hold the one-hot class encodings of shape `(batch, #boxes, #classes)`
# It will contain all zeros for now, the classes will be set in the matching process that follows
classes_tensor = np.zeros((batch_size, boxes_tensor.shape[1], self.n_classes))
# 4: Create a tensor to contain the variances. This tensor has the same shape as `boxes_tensor` and simply
# contains the same 4 variance values for every position in the last axis.
variances_tensor = np.zeros_like(boxes_tensor)
variances_tensor += self.variances # Long live broadcasting
# 4: Concatenate the classes, boxes and variances tensors to get our final template for y_encoded. We also need
# another tensor of the shape of `boxes_tensor` as a space filler so that `y_encode_template` has the same
# shape as the SSD model output tensor. The content of this tensor is irrelevant, we'll just use
# `boxes_tensor` a second time.
y_encode_template = np.concatenate((classes_tensor, boxes_tensor, boxes_tensor, variances_tensor), axis=2)
if diagnostics:
return y_encode_template, self.centers_diag, self.wh_list_diag, self.steps_diag, self.offsets_diag
else:
return y_encode_template
def encode_y(self, ground_truth_labels, diagnostics=False):
# 1: Generate the template for y_encoded
y_encode_template = self.generate_encode_template(batch_size=len(ground_truth_labels), diagnostics=False)
y_encoded = np.copy(y_encode_template) # We'll write the ground truth box data to this array
# 2: Match the boxes from `ground_truth_labels` to the anchor boxes in `y_encode_template`
# and for each matched box record the ground truth coordinates in `y_encoded`.
# Every time there is no match for a anchor box, record `class_id` 0 in `y_encoded` for that anchor box.
class_vector = np.eye(self.n_classes) # An identity matrix that we'll use as one-hot class vectors
for i in range(y_encode_template.shape[0]): # For each batch item...
available_boxes = np.ones((y_encode_template.shape[1])) # 1 for all anchor boxes that are not yet matched to a ground truth box, 0 otherwise
negative_boxes = np.ones((y_encode_template.shape[1])) # 1 for all negative boxes, 0 otherwise
for true_box in ground_truth_labels[i]: # For each ground truth box belonging to the current batch item...
true_box = true_box.astype(np.float)
if abs(true_box[3] - true_box[1] < 0.001) or abs(true_box[4] - true_box[2] < 0.001): continue # Protect ourselves against bad ground truth data: boxes with width or height equal to zero
if self.normalize_coords:
true_box[[1,3]] /= self.img_width # Normalize xmin and xmax to be within [0,1]
true_box[[2,4]] /= self.img_height # Normalize ymin and ymax to be within [0,1]
if self.coords == 'centroids':
true_box = convert_coordinates(true_box, start_index=1, conversion='corners2centroids')
elif self.coords == 'minmax':
true_box = convert_coordinates(true_box, start_index=1, conversion='corners2minmax')
similarities = iou(y_encode_template[i,:,-12:-8], true_box[1:], coords=self.coords) # The iou similarities for all anchor boxes
negative_boxes[similarities >= self.neg_iou_threshold] = 0 # If a negative box gets an IoU match >= `self.neg_iou_threshold`, it's no longer a valid negative box
similarities *= available_boxes # Filter out anchor boxes which aren't available anymore (i.e. already matched to a different ground truth box)
available_and_thresh_met = np.copy(similarities)
available_and_thresh_met[available_and_thresh_met < self.pos_iou_threshold] = 0 # Filter out anchor boxes which don't meet the iou threshold
assign_indices = np.nonzero(available_and_thresh_met)[0] # Get the indices of the left-over anchor boxes to which we want to assign this ground truth box
if len(assign_indices) > 0: # If we have any matches
y_encoded[i,assign_indices,:-8] = np.concatenate((class_vector[int(true_box[0])], true_box[1:]), axis=0) # Write the ground truth box coordinates and class to all assigned anchor box positions. Remember that the last four elements of `y_encoded` are just dummy entries.
available_boxes[assign_indices] = 0 # Make the assigned anchor boxes unavailable for the next ground truth box
else: # If we don't have any matches
best_match_index = np.argmax(similarities) # Get the index of the best iou match out of all available boxes
y_encoded[i,best_match_index,:-8] = np.concatenate((class_vector[int(true_box[0])], true_box[1:]), axis=0) # Write the ground truth box coordinates and class to the best match anchor box position
available_boxes[best_match_index] = 0 # Make the assigned anchor box unavailable for the next ground truth box
negative_boxes[best_match_index] = 0 # The assigned anchor box is no longer a negative box
# Set the classes of all remaining available anchor boxes to class zero
background_class_indices = np.nonzero(negative_boxes)[0]
y_encoded[i,background_class_indices,0] = 1
# 3: Convert absolute box coordinates to offsets from the anchor boxes and normalize them
if self.coords == 'centroids':
y_encoded[:,:,[-12,-11]] -= y_encode_template[:,:,[-12,-11]] # cx(gt) - cx(anchor), cy(gt) - cy(anchor)
y_encoded[:,:,[-12,-11]] /= y_encode_template[:,:,[-10,-9]] * y_encode_template[:,:,[-4,-3]] # (cx(gt) - cx(anchor)) / w(anchor) / cx_variance, (cy(gt) - cy(anchor)) / h(anchor) / cy_variance
y_encoded[:,:,[-10,-9]] /= y_encode_template[:,:,[-10,-9]] # w(gt) / w(anchor), h(gt) / h(anchor)
y_encoded[:,:,[-10,-9]] = np.log(y_encoded[:,:,[-10,-9]]) / y_encode_template[:,:,[-2,-1]] # ln(w(gt) / w(anchor)) / w_variance, ln(h(gt) / h(anchor)) / h_variance (ln == natural logarithm)
elif self.coords == 'corners':
y_encoded[:,:,-12:-8] -= y_encode_template[:,:,-12:-8] # (gt - anchor) for all four coordinates
y_encoded[:,:,[-12,-10]] /= np.expand_dims(y_encode_template[:,:,-10] - y_encode_template[:,:,-12], axis=-1) # (xmin(gt) - xmin(anchor)) / w(anchor), (xmax(gt) - xmax(anchor)) / w(anchor)
y_encoded[:,:,[-11,-9]] /= np.expand_dims(y_encode_template[:,:,-9] - y_encode_template[:,:,-11], axis=-1) # (ymin(gt) - ymin(anchor)) / h(anchor), (ymax(gt) - ymax(anchor)) / h(anchor)
y_encoded[:,:,-12:-8] /= y_encode_template[:,:,-4:] # (gt - anchor) / size(anchor) / variance for all four coordinates, where 'size' refers to w and h respectively
else:
y_encoded[:,:,-12:-8] -= y_encode_template[:,:,-12:-8] # (gt - anchor) for all four coordinates
y_encoded[:,:,[-12,-11]] /= np.expand_dims(y_encode_template[:,:,-11] - y_encode_template[:,:,-12], axis=-1) # (xmin(gt) - xmin(anchor)) / w(anchor), (xmax(gt) - xmax(anchor)) / w(anchor)
y_encoded[:,:,[-10,-9]] /= np.expand_dims(y_encode_template[:,:,-9] - y_encode_template[:,:,-10], axis=-1) # (ymin(gt) - ymin(anchor)) / h(anchor), (ymax(gt) - ymax(anchor)) / h(anchor)
y_encoded[:,:,-12:-8] /= y_encode_template[:,:,-4:] # (gt - anchor) / size(anchor) / variance for all four coordinates, where 'size' refers to w and h respectively
if diagnostics:
# Here we'll save the matched anchor boxes (i.e. anchor boxes that were matched to a ground truth box, but keeping the anchor box coordinates).
y_matched_anchors = np.copy(y_encoded)
y_matched_anchors[:,:,-12:-8] = 0 # Keeping the anchor box coordinates means setting the offsets to zero.
return y_encoded, y_matched_anchors
else:
return y_encoded
#ssd_box_encode_decode_utils completete
#keras_layer_AnchorBoxes
class AnchorBoxes(Layer):
def __init__(self,
img_height,
img_width,
this_scale,
next_scale,
aspect_ratios=[0.5, 1.0, 2.0],
two_boxes_for_ar1=True,
this_steps=None,
this_offsets=None,
limit_boxes=True,
variances=[1.0, 1.0, 1.0, 1.0],
coords='centroids',
normalize_coords=False,
**kwargs):
if K.backend() != 'tensorflow':
raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
if (this_scale < 0) or (next_scale < 0) or (this_scale > 1):
raise ValueError("`this_scale` must be in [0, 1] and `next_scale` must be >0, but `this_scale` == {}, `next_scale` == {}".format(this_scale, next_scale))
if len(variances) != 4:
raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
variances = np.array(variances)
if np.any(variances <= 0):
raise ValueError("All variances must be >0, but the variances given are {}".format(variances))
self.img_height = img_height
self.img_width = img_width
self.this_scale = this_scale
self.next_scale = next_scale
self.aspect_ratios = aspect_ratios
self.two_boxes_for_ar1 = two_boxes_for_ar1
self.this_steps = this_steps
self.this_offsets = this_offsets
self.limit_boxes = limit_boxes
self.variances = variances
self.coords = coords
self.normalize_coords = normalize_coords
# Compute the number of boxes per cell
if (1 in aspect_ratios) and two_boxes_for_ar1:
self.n_boxes = len(aspect_ratios) + 1
else:
self.n_boxes = len(aspect_ratios)
super(AnchorBoxes, self).__init__(**kwargs)
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
super(AnchorBoxes, self).build(input_shape)
def call(self, x, mask=None):
# Compute box width and height for each aspect ratio
# The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
size = min(self.img_height, self.img_width)
# Compute the box widths and and heights for all aspect ratios
wh_list = []
for ar in self.aspect_ratios:
if (ar == 1):
# Compute the regular anchor box for aspect ratio 1.
box_height = box_width = self.this_scale * size
wh_list.append((box_width, box_height))
if self.two_boxes_for_ar1:
# Compute one slightly larger version using the geometric mean of this scale value and the next.
box_height = box_width = np.sqrt(self.this_scale * self.next_scale) * size
wh_list.append((box_width, box_height))
else:
box_height = self.this_scale * size / np.sqrt(ar)
box_width = self.this_scale * size * np.sqrt(ar)
wh_list.append((box_width, box_height))
wh_list = np.array(wh_list)
# We need the shape of the input tensor
if K.image_dim_ordering() == 'tf':
batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape
else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape
# Compute the grid of box center points. They are identical for all aspect ratios.
# Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally.
if (self.this_steps is None):
step_height = self.img_height / feature_map_height
step_width = self.img_width / feature_map_width
else:
if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2):
step_height = self.this_steps[0]
step_width = self.this_steps[1]
elif isinstance(self.this_steps, (int, float)):
step_height = self.this_steps
step_width = self.this_steps
# Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image.
if (self.this_offsets is None):
offset_height = 0.5
offset_width = 0.5
else:
if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2):
offset_height = self.this_offsets[0]
offset_width = self.this_offsets[1]
elif isinstance(self.this_offsets, (int, float)):
offset_height = self.this_offsets
offset_width = self.this_offsets
# Now that we have the offsets and step sizes, compute the grid of anchor box center points.
cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height)
cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width)
cx_grid, cy_grid = np.meshgrid(cx, cy)
cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down
cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down
# Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
# where the last dimension will contain `(cx, cy, w, h)`
boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))
boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx
boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy
boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h
# Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners')
# If `limit_boxes` is enabled, clip the coordinates to lie within the image boundaries
if self.limit_boxes:
x_coords = boxes_tensor[:,:,:,[0, 2]]
x_coords[x_coords >= self.img_width] = self.img_width - 1
x_coords[x_coords < 0] = 0
boxes_tensor[:,:,:,[0, 2]] = x_coords
y_coords = boxes_tensor[:,:,:,[1, 3]]
y_coords[y_coords >= self.img_height] = self.img_height - 1
y_coords[y_coords < 0] = 0
boxes_tensor[:,:,:,[1, 3]] = y_coords
# If `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
if self.normalize_coords:
boxes_tensor[:, :, :, [0, 2]] /= self.img_width
boxes_tensor[:, :, :, [1, 3]] /= self.img_height
# TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth.
if self.coords == 'centroids':
# Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`.
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids')
elif self.coords == 'minmax':
# Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax).
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax')
# Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
# as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
variances_tensor += self.variances # Long live broadcasting
# Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)
# Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
# The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1))
return boxes_tensor
def compute_output_shape(self, input_shape):
if K.image_dim_ordering() == 'tf':
batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape
else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape
return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
def get_config(self):
config = {
'img_height': self.img_height,
'img_width': self.img_width,
'this_scale': self.this_scale,
'next_scale': self.next_scale,
'aspect_ratios': list(self.aspect_ratios),
'two_boxes_for_ar1': self.two_boxes_for_ar1,
'limit_boxes': self.limit_boxes,
'variances': list(self.variances),
'coords': self.coords,
'normalize_coords': self.normalize_coords
}
base_config = super(AnchorBoxes, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
#keras_layer_AnchorBoxes compelete
#keras_ssd7
def build_model(image_size,
n_classes,
l2_regularization=0.0,
min_scale=0.1,
max_scale=0.9,
scales=None,
aspect_ratios_global=[0.5, 1.0, 2.0],
aspect_ratios_per_layer=None,
two_boxes_for_ar1=True,
steps=None,
offsets=None,
limit_boxes=True,
variances=[1.0, 1.0, 1.0, 1.0],
coords='centroids',
normalize_coords=False,
subtract_mean=None,
divide_by_stddev=None,
swap_channels=False,
return_predictor_sizes=False):
n_predictor_layers = 4
n_classes += 1
if aspect_ratios_global is None and aspect_ratios_per_layer is None:
raise ValueError("`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified.")
if aspect_ratios_per_layer:
if len(aspect_ratios_per_layer) != n_predictor_layers:
raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format(n_predictor_layers, len(aspect_ratios_per_layer)))
if (min_scale is None or max_scale is None) and scales is None:
raise ValueError("Either `min_scale` and `max_scale` or `scales` need to be specified.")
if scales:
if len(scales) != n_predictor_layers+1:
raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format(n_predictor_layers+1, len(scales)))
else:
scales = np.linspace(min_scale, max_scale, n_predictor_layers+1)
if len(variances) != 4:
raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
variances = np.array(variances)
if np.any(variances <= 0):
raise ValueError("All variances must be >0, but the variances given are {}".format(variances))
if (not (steps is None)) and (len(steps) != n_predictor_layers):
raise ValueError("You must provide at least one step value per predictor layer.")
if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
raise ValueError("You must provide at least one offset value per predictor layer.")
if aspect_ratios_per_layer:
aspect_ratios = aspect_ratios_per_layer
else:
aspect_ratios = [aspect_ratios_global] * n_predictor_layers
if aspect_ratios_per_layer:
n_boxes = []
for ar in aspect_ratios_per_layer:
if (1 in ar) & two_boxes_for_ar1:
n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1
else:
n_boxes.append(len(ar))
else:
if (1 in aspect_ratios_global) & two_boxes_for_ar1:
n_boxes = len(aspect_ratios_global) + 1
else:
n_boxes = len(aspect_ratios_global)
n_boxes = [n_boxes] * n_predictor_layers
if steps is None:
steps = [None] * n_predictor_layers
if offsets is None:
offsets = [None] * n_predictor_layers
l2_reg = l2_regularization
img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2]
x = Input(shape=(img_height, img_width, img_channels))
x1 = Lambda(lambda z: z,
output_shape=(img_height, img_width, img_channels),
name='idendity_layer')(x)
if not (subtract_mean is None):
x1 = Lambda(lambda z: z - np.array(subtract_mean),
output_shape=(img_height, img_width, img_channels),
name='input_mean_normalization')(x1)
if not (divide_by_stddev is None):
x1 = Lambda(lambda z: z / np.array(divide_by_stddev),
output_shape=(img_height, img_width, img_channels),
name='input_stddev_normalization')(x1)
if swap_channels and (img_channels == 3):
x1 = Lambda(lambda z: z[...,::-1],
output_shape=(img_height, img_width, img_channels),
name='input_channel_swap')(x1)
conv1 = Conv2D(32, (5, 5), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1')(x1)
conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(conv1) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
conv1 = ELU(name='elu1')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1)
conv2 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2')(pool1)
conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2)
conv2 = ELU(name='elu2')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2)
conv3 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3')(pool2)
conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3)
conv3 = ELU(name='elu3')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3)
conv4 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4')(pool3)
conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4)
conv4 = ELU(name='elu4')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4)
conv5 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5')(pool4)
conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5)
conv5 = ELU(name='elu5')(conv5)
pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5)
conv6 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6')(pool5)
conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6)
conv6 = ELU(name='elu6')(conv6)
pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6)
conv7 = Conv2D(32, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7')(pool6)
conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7)
conv7 = ELU(name='elu7')(conv7)
classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes4')(conv4)
classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes5')(conv5)
classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes6')(conv6)
classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes7')(conv7)
boxes4 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes4')(conv4)
boxes5 = Conv2D(n_boxes[1] * 4, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes5')(conv5)
boxes6 = Conv2D(n_boxes[2] * 4, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes6')(conv6)
boxes7 = Conv2D(n_boxes[3] * 4, (3, 3), strides=(1, 1), padding="valid", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes7')(conv7)
anchors4 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0],
two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0],
limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors4')(boxes4)
anchors5 = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1],
two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1],
limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors5')(boxes5)
anchors6 = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2],
two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2],
limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors6')(boxes6)
anchors7 = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3],
two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3],
limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors7')(boxes7)
classes4_reshaped = Reshape((-1, n_classes), name='classes4_reshape')(classes4)
classes5_reshaped = Reshape((-1, n_classes), name='classes5_reshape')(classes5)
classes6_reshaped = Reshape((-1, n_classes), name='classes6_reshape')(classes6)
classes7_reshaped = Reshape((-1, n_classes), name='classes7_reshape')(classes7)
boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4)
boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5)
boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6)
boxes7_reshaped = Reshape((-1, 4), name='boxes7_reshape')(boxes7)
anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4)
anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5)
anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6)
anchors7_reshaped = Reshape((-1, 8), name='anchors7_reshape')(anchors7)
classes_concat = Concatenate(axis=1, name='classes_concat')([classes4_reshaped,
classes5_reshaped,
classes6_reshaped,
classes7_reshaped])
boxes_concat = Concatenate(axis=1, name='boxes_concat')([boxes4_reshaped,
boxes5_reshaped,
boxes6_reshaped,
boxes7_reshaped])
anchors_concat = Concatenate(axis=1, name='anchors_concat')([anchors4_reshaped,
anchors5_reshaped,
anchors6_reshaped,
anchors7_reshaped])
classes_softmax = Activation('softmax', name='classes_softmax')(classes_concat)
predictions = Concatenate(axis=2, name='predictions')([classes_softmax, boxes_concat, anchors_concat])
model = Model(inputs=x, outputs=predictions)
if return_predictor_sizes:
predictor_sizes = np.array([classes4._keras_shape[1:3],
classes5._keras_shape[1:3],
classes6._keras_shape[1:3],
classes7._keras_shape[1:3]])
return model, predictor_sizes
else:
return model
#keras_ssd7 compelete
#keras_ssd_loss
class SSDLoss:
def __init__(self,
neg_pos_ratio=3,
n_neg_min=0,
alpha=1.0):
self.neg_pos_ratio = neg_pos_ratio
self.n_neg_min = n_neg_min
self.alpha = alpha
def smooth_L1_loss(self, y_true, y_pred):
absolute_loss = tf.abs(y_true - y_pred)
square_loss = 0.5 * (y_true - y_pred)**2
l1_loss = tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5)
return tf.reduce_sum(l1_loss, axis=-1)
def log_loss(self, y_true, y_pred):
# Make sure that `y_pred` doesn't contain any zeros (which would break the log function)
y_pred = tf.maximum(y_pred, 1e-15)
# Compute the log loss
log_loss = -tf.reduce_sum(y_true * tf.log(y_pred), axis=-1)