-
Notifications
You must be signed in to change notification settings - Fork 0
/
VGG
124 lines (101 loc) · 5.6 KB
/
VGG
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from math import sqrt
import tensorflow as tf
from fathom.nn import default_runstep
from fathom.imagenet import imagenet
class VGG(imagenet.ImagenetModel): #创建一个类
"""VGG Network."""
def build_hyperparameters(self): #定义一个假设函数
# TODO: put these into runstep options or somewhere else
# Parameters
self.learning_rate = 0.0001 #假设函数的学习率为0.0001
self.training_iters = 200000 #训练样本为200000
self.batch_size = 8 #不知道是代表哪一个参数
if self.init_options:
self.batch_size = self.init_options.get('batch_size', self.batch_size)
self.display_step = 1
if not self.forward_only:
self.dropout = 0.8 # Dropout, probability to keep units
else:
self.dropout = 1.
self.keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
def build_inference(self, images): #构造预测图像函数
with self.G.as_default():
# fix dimensions
input_shape = images.get_shape().as_list() #输入的图像大小用元组表示
if len(input_shape) == 2: #如果图像元组长度为2
ndim = int(sqrt(input_shape[1])) #维度等于input_shape的第二个元素的平方
if ndim * ndim != input_shape[1]: # 如果维度的平方不等于input_shape的第二个元素的平方
raise ValueError('input_shape should be square') #报错
images = tf.reshape(images, [-1, ndim, ndim, 1]) #图像矩阵化
# assume images shape is 224x224x3
# block 1 -- outputs 112x112x64
conv1_1 = conv_op(images, name="conv1_1", kh=3, kw=3, n_out=64, dh=1, dw=1)
conv1_2 = conv_op(conv1_1, name="conv1_2", kh=3, kw=3, n_out=64, dh=1, dw=1)
pool1 = mpool_op(conv1_2, name="pool1", kh=2, kw=2, dw=2, dh=2) #第一次卷积卷积池化
# block 2 -- outputs 56x56x128
conv2_1 = conv_op(pool1, name="conv2_1", kh=3, kw=3, n_out=128, dh=1, dw=1)
conv2_2 = conv_op(conv2_1, name="conv2_2", kh=3, kw=3, n_out=128, dh=1, dw=1)
pool2 = mpool_op(conv2_2, name="pool2", kh=2, kw=2, dh=2, dw=2) #第二次卷积卷积池化
# TODO: VGG pooling in later layers is too aggressive for MNIST
using_imagenet = True
if using_imagenet:
# block 3 -- outputs 28x28x256
conv3_1 = conv_op(pool2, name="conv3_1", kh=3, kw=3, n_out=256, dh=1, dw=1)
conv3_2 = conv_op(conv3_1, name="conv3_2", kh=3, kw=3, n_out=256, dh=1, dw=1)
pool3 = mpool_op(conv3_2, name="pool3", kh=2, kw=2, dh=2, dw=2) #第三次卷积卷积池化
# block 4 -- outputs 14x14x512
conv4_1 = conv_op(pool3, name="conv4_1", kh=3, kw=3, n_out=512, dh=1, dw=1)
conv4_2 = conv_op(conv4_1, name="conv4_2", kh=3, kw=3, n_out=512, dh=1, dw=1)
conv4_3 = conv_op(conv4_2, name="conv4_2", kh=3, kw=3, n_out=512, dh=1, dw=1)
pool4 = mpool_op(conv4_3, name="pool4", kh=2, kw=2, dh=2, dw=2) #第四次卷积卷积池化
# block 5 -- outputs 7x7x512
conv5_1 = conv_op(pool4, name="conv5_1", kh=3, kw=3, n_out=512, dh=1, dw=1)
conv5_2 = conv_op(conv5_1, name="conv5_2", kh=3, kw=3, n_out=512, dh=1, dw=1)
conv5_3 = conv_op(conv5_2, name="conv5_3", kh=3, kw=3, n_out=512, dh=1, dw=1)
pool5 = mpool_op(conv5_3, name="pool5", kh=2, kw=2, dw=2, dh=2) #第五次卷积卷积池化
# flatten
shp = pool5.get_shape().as_list() # pool2 if shrunk
flattened_shape = shp[1] * shp[2] * shp[3]
resh1 = tf.reshape(pool5, [self.batch_size, flattened_shape], name="resh1") #这段看不懂
# fully connected
fc6 = fc_op(resh1, name="fc6", n_out=4096)
fc6_drop = tf.nn.dropout(fc6, self.dropout, name="fc6_drop")
fc7 = fc_op(fc6_drop, name="fc7", n_out=4096)
fc7_drop = tf.nn.dropout(fc7, self.dropout, name="fc7_drop")
fc8 = fc_op(fc7_drop, name="fc8", n_out=self.n_classes)
self.logits = fc8
return self.logits
# crudely based on https://github.com/huyng/tensorflow-vgg
# TODO: refactor these utility functions across convnet models to remove dependencies
def conv_op(input_op, name, kw, kh, n_out, dw, dh):
n_in = input_op.get_shape()[-1].value
with tf.name_scope(name) as scope:
kernel_init_val = tf.truncated_normal([kh, kw, n_in, n_out], dtype=tf.float32, stddev=0.1)
kernel = tf.Variable(kernel_init_val, trainable=True, name='w')
conv = tf.nn.conv2d(input_op, kernel, (1, dh, dw, 1), padding='SAME')
bias_init_val = tf.constant(0.0, shape=[n_out], dtype=tf.float32)
biases = tf.Variable(bias_init_val, trainable=True, name='b')
z = tf.reshape(tf.nn.bias_add(conv, biases), [n_in] + conv.get_shape().as_list()[1:])
z = tf.nn.bias_add(conv, biases)
activation = tf.nn.relu(z, name=scope)
return activation
def fc_op(input_op, name, n_out):
n_in = input_op.get_shape()[-1].value
with tf.name_scope(name):
kernel = tf.Variable(tf.truncated_normal([n_in, n_out], dtype=tf.float32, stddev=0.1), name='w')
biases = tf.Variable(tf.constant(0.0, shape=[n_out], dtype=tf.float32), name='b')
activation = tf.nn.relu_layer(input_op, kernel, biases, name=name)
return activation
def mpool_op(input_op, name, kh, kw, dh, dw):
return tf.nn.max_pool(input_op,
ksize=[1, kh, kw, 1],
strides=[1, dh, dw, 1],
padding='VALID',
name=name)
class VGGFwd(VGG):
forward_only = True
if __name__ == "__main__":
m = VGG()
m.setup()
m.run(runstep=default_runstep, n_steps=10)
m.teardown()