mini textmatch 模型 + 人造少量數據,打印觀測 attention 值的情況,以理解attention,或看設計的match計算模塊是否有用

import tensorflow as tf

# tf.enable_eager_execution()
batch_size = 2
seq_length = 3
word2id = {}
word2id["love"] = 0
word2id["you"] = 1
word2id["hate"] = 2
word2id["I"] = 3
embedding_dim = 20
seq1 = tf.placeholder(name="seq1",shape=[batch_size,seq_length],dtype=tf.int32)
seq2 = tf.placeholder(name="seq2",shape=[batch_size,seq_length],dtype=tf.int32)
y = tf.placeholder(name="yy",shape=[batch_size],dtype=tf.float32)
word_embedding = tf.get_variable("word_embedding", trainable=True,
                                 shape=[len(word2id),embedding_dim], dtype=tf.float32)
seq1_ = tf.nn.embedding_lookup(word_embedding,seq1)
seq2_ = tf.nn.embedding_lookup(word_embedding,seq2)

def multi_perspective_match(feature_dim, seq1_, seq2_):
    cosine_value = cosine_distance(seq1_, seq2_, cosine_norm=False)
    cosine_value = tf.reshape(cosine_value, [batch_size, seq_length])
    matching_result = cosine_value
    # matching_result = tf.reduce_max(matching_result, axis=-1)
    matching_result = tf.reduce_sum(matching_result, axis=-1)
    return matching_result,cosine_value


def cosine_distance(y1,y2, cosine_norm=True, eps=1e-6):
    # cosine_norm = True
    # y1 [....,a, 1, d]
    # y2 [....,1, b, d]
    cosine_numerator = tf.reduce_sum(tf.multiply(y1, y2), axis=-1)
    if not cosine_norm:
        return tf.tanh(cosine_numerator)
    y1_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y1), axis=-1), eps))
    y2_norm = tf.sqrt(tf.maximum(tf.reduce_sum(tf.square(y2), axis=-1), eps))
    return cosine_numerator / y1_norm / y2_norm

def cal_relevancy_matrix(in_question_repres, in_passage_repres):
    in_question_repres_tmp = tf.expand_dims(in_question_repres, 1) # [batch_size, 1, question_len, dim]
    in_passage_repres_tmp = tf.expand_dims(in_passage_repres, 2) # [batch_size, passage_len, 1, dim]
    relevancy_matrix = cosine_distance(in_question_repres_tmp,in_passage_repres_tmp) # [batch_size, passage_len, question_len]
    return relevancy_matrix

relevancy_matrix = cal_relevancy_matrix(seq1_, seq2_)

# att_dim = 20
# relevancy_matrix = layer_utils.calcuate_attention(seq1_, seq2_, embedding_dim,
#                                               embedding_dim,
#                                               scope_name="attention",
#                                               att_dim=att_dim,
#                                               remove_diagnoal=False, mask1=None,
#                                               mask2=None, is_training=False,
#                                               dropout_rate=0)
att_question_contexts = tf.matmul(relevancy_matrix, seq1_)
attentive_rep,cosine_value = multi_perspective_match(embedding_dim, seq2_, att_question_contexts)

matching_result = attentive_rep

loss = tf.reduce_mean((matching_result-y)*(matching_result-y))

prob = matching_result

predictions = prob

tvars = tf.trainable_variables()

optimizer = tf.train.AdamOptimizer(learning_rate=0.0005)

def compute_gradients(tensor, var_list):
  grads = tf.gradients(tensor, var_list)
  return [grad if grad is not None else tf.zeros_like(var) for var, grad in zip(var_list, grads)]

grads = compute_gradients(loss, tvars)
grads, _ = tf.clip_by_global_norm(grads, 10.0)
train_op = optimizer.apply_gradients(zip(grads, tvars))

initializer = tf.global_variables_initializer()
sess = tf.Session()
sess.run(initializer)

def sentence2ids(sentence):
    result = []
    for word in sentence.split(" "):
        result.append(word2id[word])
    if len(result)<seq_length:
        result.extend([0]*(seq_length-len(result)))
    return result

import random
def get_feed():
    feed_dict={}
    tmp = random.randint(0,3)
    if tmp==0:
        feed_dict[seq1] = [sentence2ids("I love you"),sentence2ids("I hate you")]
        feed_dict[seq2] = [sentence2ids("I hate you"),sentence2ids("I love you")]
        feed_dict[y] = [0,0]
    elif tmp==1:
        feed_dict[seq1] = [sentence2ids("I hate you"),sentence2ids("I love you")]
        feed_dict[seq2] = [sentence2ids("I hate you"),sentence2ids("I love you")]
        feed_dict[y] = [1,1]
    elif tmp==2:
        feed_dict[seq1] = [sentence2ids("I love you"),sentence2ids("I love you")]
        feed_dict[seq2] = [sentence2ids("I hate you"),sentence2ids("I love you")]
        feed_dict[y] = [0,1]
    elif tmp==3:
        feed_dict[seq1] = [sentence2ids("I hate you"),sentence2ids("I love you")]
        feed_dict[seq2] = [sentence2ids("I love you"),sentence2ids("I love you")]
        feed_dict[y] = [0,1]
    return feed_dict,tmp

for epoch in range(5000):
    loss100 = 0
    feed_dict,random_num = get_feed()
    _, loss_value ,prob_ , y_,atten_scores_,cosine_value_ = sess.run([train_op, loss, prob, y,
                                                                      relevancy_matrix,cosine_value], feed_dict=feed_dict)
    loss100+=loss_value
    if epoch%100==99:
        print(loss100)
        loss100=0

print("---")
print("rand num"+str(random_num))
for i in range(len(prob_)):
    print(prob_[i])
    print(y_[i])
    print(atten_scores_[i])
    print(cosine_value_[i])
    print("-")

feed_dict={}
feed_dict[seq1] = [sentence2ids("love I you"),sentence2ids("I you hate")]
feed_dict[seq2] = [sentence2ids("hate I you"),sentence2ids("I you love")]
feed_dict[y] = [0,0]
prob_, y_, atten_scores_, cosine_value_ = sess.run([prob, y,
                                                     relevancy_matrix,cosine_value], feed_dict=feed_dict)
print("~~~")
for i in range(len(prob_)):
    print(prob_[i])
    print(y_[i])
    print(atten_scores_[i])
    print(cosine_value_[i])
    print("-")

結論:
relevancy_matrix確實是反映每個詞和每個詞的匹配分,
在這裏的cosine_value則可反映出最後match結果裏最重要的是哪個詞-詞匹配造成的。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章