-
Notifications
You must be signed in to change notification settings - Fork 1
/
model.py
220 lines (194 loc) · 10.6 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import tensorflow as tf
from utils import *
import tensorflow.contrib.slim as slim
def init_weight(shape, name):
var = tf.Variable(tf.truncated_normal(shape, mean=0, stddev=1.0), name=name)
return var
class MPCNN_Layer():
def __init__(self, num_classes, embedding_size, filter_sizes, num_filters, n_hidden,
input_x1, input_x2, input_y, dropout_keep_prob, l2_reg_lambda):
'''
:param sequence_length:
:param num_classes:
:param embedding_size:
:param filter_sizes:
:param num_filters:
'''
self.embedding_size = embedding_size
self.filter_sizes = filter_sizes
self .num_filters = num_filters
self.num_classes = num_classes
self.poolings = [tf.reduce_max, tf.reduce_min, tf.reduce_mean]
self.input_x1 = input_x1
self.input_x2 = input_x2
self.input_y = input_y
self.dropout_keep_prob = dropout_keep_prob
self.l2_loss = tf.constant(0.0)
self.l2_reg_lambda = l2_reg_lambda
self.W1 = [init_weight([filter_sizes[0], embedding_size, 1, num_filters[0]], "W1_0"),
init_weight([filter_sizes[1], embedding_size, 1, num_filters[0]], "W1_1"),
init_weight([filter_sizes[2], embedding_size, 1, num_filters[0]], "W1_2")]
self.b1 = [tf.Variable(tf.constant(0.1, shape=[num_filters[0]]), "b1_0"),
tf.Variable(tf.constant(0.1, shape=[num_filters[0]]), "b1_1"),
tf.Variable(tf.constant(0.1, shape=[num_filters[0]]), "b1_2")]
self.W2 = [init_weight([filter_sizes[0], embedding_size, 1, num_filters[1]], "W2_0"),
init_weight([filter_sizes[1], embedding_size, 1, num_filters[1]], "W2_1")]
self.b2 = [tf.Variable(tf.constant(0.1, shape=[num_filters[1], embedding_size]), "b2_0"),
tf.Variable(tf.constant(0.1, shape=[num_filters[1], embedding_size]), "b2_1")]
self.h = num_filters[0]*len(self.poolings)*2 + \
num_filters[1]*(len(self.poolings)-1)*(len(filter_sizes)-1)*3 + \
len(self.poolings)*len(filter_sizes)*len(filter_sizes)*3
self.Wh = tf.Variable(tf.random_normal([604, n_hidden], stddev=0.01), name='Wh')
self.bh = tf.Variable(tf.constant(0.1, shape=[n_hidden]), name="bh")
self.Wo = tf.Variable(tf.random_normal([n_hidden, num_classes], stddev=0.01), name='Wo')
self.bo = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="bo")
def attention(self):
sent1_unstack = tf.unstack(self.input_x1, axis=1)
sent2_unstack = tf.unstack(self.input_x2, axis=1)
D = []
for i in range(len(sent1_unstack)):
d = []
for j in range(len(sent2_unstack)):
dis = compute_cosine_distance(sent1_unstack[i], sent2_unstack[j])
#dis:[batch_size, 1(channels)]
d.append(dis)
D.append(d)
print(i)
D = tf.reshape(D, [-1, len(sent1_unstack), len(sent2_unstack), 1])
A = [tf.nn.softmax(tf.expand_dims(tf.reduce_sum(D, axis=i), 2)) for i in [2, 1]]
atten_embed = []
atten_embed.append(tf.concat([self.input_x1, A[0] * self.input_x1], 2))
atten_embed.append(tf.concat([self.input_x2, A[1] * self.input_x2], 2))
return atten_embed
def per_dim_conv_layer(self, x, w, b, pooling):
'''
为了实现per_dim的卷积。所以我们要将输入和权重偏置参数在embed_size维度上进行unstack
这样我们就获得了每个维度上的输入、权重、偏置。
x 的embed_size在x[2],因此 axis = 2
w 的embed_size在w[1],因此 axis = 1
b 的embed_size在b[1],因此 axis = 1
:param input: [batch_size, sentence_length, embed_size, 1]
:param w: [ws, embedding_size, 1, num_filters]
:param b: [num_filters, embedding_size]
:param pooling: [tf.reduce_max, tf.reduce_min, tf.reduce_mean]
:return:
'''
# unpcak the input in the dim of embed_dim
input_unstack = tf.unstack(x, axis=2)
w_unstack = tf.unstack(w, axis=1)
b_unstack = tf.unstack(b, axis=1)
convs = []
for i in range(x.get_shape()[2]):
conv = tf.nn.conv1d(input_unstack[i], w_unstack[i], stride=1, padding="VALID")
conv = slim.batch_norm(inputs=conv, activation_fn=tf.nn.tanh, is_training=self.is_training)
# [batch_size, sentence_length-ws+1, num_filters_A]
convs.append(conv)
conv = tf.stack(convs, axis=2) # [batch_size, sentence_length-ws+1, embed_size, num_filters_A]
pool = pooling(conv, axis=1) # [batch_size, embed_size, num_filters_A]
return pool
def bulit_block_A(self, x):
#bulid block A and cal the similarity according to algorithm 1
'''
:param pooling: 对应[tf.reduce_max, tf.reduce_min, tf.reduce_mean]的其中一个方法,如:
pooling[conv, axis=1] = tf.reduce_max(input_tensor,axis),axis表示在哪一维求解
:param filter_size: [1,2,100]
:return:out是一个嵌套列表。第一层长度为3,代表三种pooling方式,第二层长度为3,代表三种卷积窗口大小,然后其中每个元素都是
[batch_size, 1, num_filters_A]的三维Tensor
'''
out = []
with tf.name_scope("bulid_block_A"):
for pooling in self.poolings:
pools = []
for i, ws in enumerate(self.filter_sizes):
with tf.name_scope("conv-pool-%s" %ws):
conv = tf.nn.conv2d(x, self.W1[i], strides=[1, 1, 1, 1], padding="VALID")
conv = slim.batch_norm(inputs=conv, activation_fn=tf.nn.tanh, is_training=self.is_training)
pool = pooling(conv, axis=1)
pools.append(pool)
out.append(pools)
return out
def bulid_block_B(self, x):
'''
:param x:
:return:Block_B的输出out也是一个嵌套列表。第一层长度为2,代表两种pooling方式,第二层长度为2,代表两种卷积窗口大小,
然后其中每个元素都是[batch_size, 50, num_filters_B]的三维Tensor。
'''
out = []
with tf.name_scope("bulid_block_B"):
for pooling in self.poolings[:-1]:
pools = []
with tf.name_scope("conv-pool"):
for i, ws in enumerate(self.filter_sizes[:-1]):
with tf.name_scope("per_conv-pool-%s" % ws):
pool = self.per_dim_conv_layer(x, self.W2[i], self.b2[i], pooling)
pools.append(pool)
out.append(pools)
return out
def similarity_sentence_layer(self):
'''
:param sent1,2都是3(三种pooling方式)*3(三种卷积窗口大小)*[batch_size,1, num_filters_A]的嵌套列表
:param num_filters代表2种filter方式的数量,即filter_A和filter_B的数量
:param comU2计算两个tensor的距离,得到一个(batch_size,2)的tensor。2表示余弦距离和L2距离
:param filter_sizes:[1,2,100]
:return:
'''
# atten = self.attention() #[batch_size, length, 2*embedding, 1]
sent1 = self.bulit_block_A(self.input_x1)
sent2 = self.bulit_block_A(self.input_x2)
fea_h = []
with tf.name_scope("cal_dis_with_alg1"):
# 将max,min,mean三个进行连接
for i in range(3):
regM1 = tf.concat(sent1[i], 1)
regM2 = tf.concat(sent2[i], 1)
# 按照每个维度进行计算max,men,mean三个值的相似度
for k in range(self.num_filters[0]):
fea_h.append(comU2(regM1[:, :, k], regM2[:, :, k]))
#self.fea_h = fea_h
fea_a = []
with tf.name_scope("cal_dis_with_alg2_2-9"):
for i in range(3):
# 对各个卷积窗口进行比较,比如:[1,2],[1,100],[2,1],[2,100]....
for j in range(len(self.filter_sizes)):
for k in range(len(self.filter_sizes)):
fea_a.append(comU1(sent1[i][j][:, 0, :], sent2[i][k][:, 0, :]))
# sent1,2都是2*2*[batch_size,50, num_filters_B]的嵌套列表
sent1 = self.bulid_block_B(self.input_x1)
sent2 = self.bulid_block_B(self.input_x2)
fea_b = []
with tf.name_scope("cal_dis_with_alg2_last"):
for i in range(len(self.poolings)-1):
for j in range(len(self.filter_sizes)-1):
for k in range(self.num_filters[1]):
fea_b.append(comU1(sent1[i][j][:, :, k], sent2[i][j][:, :, k]))
#self.fea_b = fea_b
# 得到fea_h是一个长度3*20=60的list。其中每个元素都是(batch_size,2)的tensor
# 得到fea_b是一个长度为2*2*20=80的list。其中每个元素是(batch_size,3)的tensor
# 将其连接,然后再第一个维度上进行扩展,60*[batch_size, 2]、80*[batch_size, 3]
# 最终就可以获得一个shape是[batch_size, 360]的Tensor。
return tf.concat(fea_h + fea_a + fea_b, 1)
def similarity_measure_layer(self, is_training=True):
self.is_training = is_training
fea = self.similarity_sentence_layer()
self.h_drop = tf.nn.dropout(fea, self.dropout_keep_prob)
# fea_h.extend(fea_a)
# fea_h.extend(fea_b)
#print len(fea_h), fea_h
#fea = tf.concat(fea_h+fea_a+fea_b, 1)
#print fea.get_shape()
with tf.name_scope("full_connect_layer"):
h = tf.nn.tanh(tf.matmul(fea, self.Wh) + self.bh)
# h = tf.nn.dropout(h, self.dropout_keep_prob)
self.scores = tf.matmul(h, self.Wo) + self.bo
self.output = tf.nn.softmax(self.scores)
# return o
# CalculateMean cross-entropy loss
reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-4), tf.trainable_variables())
with tf.name_scope("loss"):
# self.loss = -tf.reduce_sum(self.input_y * tf.log(self.output))
self.loss = tf.reduce_sum(tf.square(tf.subtract(self.input_y, self.output))) + reg
# self.loss = tf.reduce_mean(
# tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y))
# self.loss = tf.reduce_mean(losses) + self.l2_reg_lambda * self.l2_loss
with tf.name_scope("accuracy"):
self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.input_y, 1), tf.argmax(self.scores, 1)), tf.float32))