import tensorflow as tf
# tf.enable_eager_execution()
batch_size = 2
seq_length = 3
word2id = {}
word2id["love"] = 0
word2id["you"] = 1
word2id["hate"] = 2
word2id["I"] = 3
embedding_dim = 20
seq1 = tf.placeholder(name="seq1",shape=[batch_size,seq_length],dtype=tf.int32)
seq2 = tf.placeholder(name="seq2",shape=[batch_size,seq_length],dtype=tf.int32)
y = tf.placeholder(name="yy",shape=[batch_size],dtype=tf.float32)
word_embedding = tf.get_variable("word_embedding", trainable=True,
shape=[len(word2id),embedding_dim], dtype=tf.float32)
seq1_ = tf.nn.embedding_lookup(word_embedding,seq1)
seq2_ = tf.nn.embedding_lookup(word_embedding,seq2)
def multi_perspective_match(feature_dim, seq1_, seq2_):
cosine_value = cosine_distance(seq1_, seq2_, cosine_norm=False)
cosine_value = tf.reshape(cosine_value, [batch_size, seq_length])
matching_result = cosine_value
# matching_result = tf.reduce_max(matching_result, axis=-1)
matching_result = tf.reduce_sum(matching_result, axis=-1)
return matching_result,cosine_value
def cosine_distance(y1,y2, cosine_norm=True, eps=1e-6):
# cosine_norm = True
# y1 [....,a, 1, d]
# y2 [....,1, b, d]
cosine_numerator = tf.reduce_sum(tf.multiply(y1, y2), axis=-1)
if not cosine_norm:
return tf.tanh(cosine_numerator)
y1_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y1), axis=-1), eps))
y2_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y2), axis=-1), eps))
return cosine_numerator / y1_norm / y2_norm
def cal_relevancy_matrix(in_question_repres, in_passage_repres):
in_question_repres_tmp = tf.expand_dims(in_question_repres, 1) # [batch_size, 1, question_len, dim]
in_passage_repres_tmp = tf.expand_dims(in_passage_repres, 2) # [batch_size, passage_len, 1, dim]
relevancy_matrix = cosine_distance(in_question_repres_tmp,in_passage_repres_tmp) # [batch_size, passage_len, question_len]
return relevancy_matrix
relevancy_matrix = cal_relevancy_matrix(seq1_, seq2_)
# att_dim = 20
# relevancy_matrix = layer_utils.calcuate_attention(seq1_, seq2_, embedding_dim,
# embedding_dim,
# scope_name="attention",
# att_dim=att_dim,
# remove_diagnoal=False, mask1=None,
# mask2=None, is_training=False,
# dropout_rate=0)
att_question_contexts = tf.matmul(relevancy_matrix, seq1_)
attentive_rep,cosine_value = multi_perspective_match(embedding_dim, seq2_, att_question_contexts)
matching_result = attentive_rep
loss = tf.reduce_mean((matching_result-y)*(matching_result-y))
prob = matching_result
predictions = prob
tvars = tf.trainable_variables()
optimizer = tf.train.AdamOptimizer(learning_rate=0.0005)
def compute_gradients(tensor, var_list):
grads = tf.gradients(tensor, var_list)
return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)]
grads = compute_gradients(loss, tvars)
grads, _ = tf.clip_by_global_norm(grads, 10.0)
train_op = optimizer.apply_gradients(zip(grads, tvars))
initializer = tf.global_variables_initializer()
sess = tf.Session()
sess.run(initializer)
def sentence2ids(sentence):
result = []
for word in sentence.split(" "):
result.append(word2id[word])
if len(result)<seq_length:
result.extend([0]*(seq_length-len(result)))
return result
import random
def get_feed():
feed_dict={}
tmp = random.randint(0,3)
if tmp==0:
feed_dict[seq1] = [sentence2ids("I love you"),sentence2ids("I hate you")]
feed_dict[seq2] = [sentence2ids("I hate you"),sentence2ids("I love you")]
feed_dict[y] = [0,0]
elif tmp==1:
feed_dict[seq1] = [sentence2ids("I hate you"),sentence2ids("I love you")]
feed_dict[seq2] = [sentence2ids("I hate you"),sentence2ids("I love you")]
feed_dict[y] = [1,1]
elif tmp==2:
feed_dict[seq1] = [sentence2ids("I love you"),sentence2ids("I love you")]
feed_dict[seq2] = [sentence2ids("I hate you"),sentence2ids("I love you")]
feed_dict[y] = [0,1]
elif tmp==3:
feed_dict[seq1] = [sentence2ids("I hate you"),sentence2ids("I love you")]
feed_dict[seq2] = [sentence2ids("I love you"),sentence2ids("I love you")]
feed_dict[y] = [0,1]
return feed_dict,tmp
for epoch in range(5000):
loss100 = 0
feed_dict,random_num = get_feed()
_, loss_value ,prob_ , y_,atten_scores_,cosine_value_ = sess.run([train_op, loss, prob, y,
relevancy_matrix,cosine_value], feed_dict=feed_dict)
loss100+=loss_value
if epoch%100==99:
print(loss100)
loss100=0
print("---")
print("rand num"+str(random_num))
for i in range(len(prob_)):
print(prob_[i])
print(y_[i])
print(atten_scores_[i])
print(cosine_value_[i])
print("-")
feed_dict={}
feed_dict[seq1] = [sentence2ids("love I you"),sentence2ids("I you hate")]
feed_dict[seq2] = [sentence2ids("hate I you"),sentence2ids("I you love")]
feed_dict[y] = [0,0]
prob_, y_, atten_scores_, cosine_value_ = sess.run([prob, y,
relevancy_matrix,cosine_value], feed_dict=feed_dict)
print("~~~")
for i in range(len(prob_)):
print(prob_[i])
print(y_[i])
print(atten_scores_[i])
print(cosine_value_[i])
print("-")
結論:
relevancy_matrix確實是反映每個詞和每個詞的匹配分,
在這裏的cosine_value則可反映出最後match結果裏最重要的是哪個詞-詞匹配造成的。