Exercises
- Get better results with a bigger and/or better shaped network
- Add more linear layers
- Try the
nn.LSTM
andnn.GRU
layers - Combine multiple of these RNNs as a higher level network
本文將展示此練習的結果分析及相關代碼。
本次的練習是PyTorch tutorial中,關於名字->國家分類的事例程序。爲了測試模型的效果,我根據原文的數據展示,編寫了隨機選擇10000次名字輸出正確率的文件evaluting。原代碼雖然模型簡單,但是效果比較好,在evaluting下可以達到60%的正確率(hidden size=32)。爲了加快訓練,同時公平地對各模型都不進行調優,本文比較各模型在hidden size=32下的效果。
原始模型(正確率:55.35%):
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.softmax = nn.LogSoftmax()
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(combined)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return Variable(torch.zeros(1, self.hidden_size))
加一層線性層在輸入層(正確率:56.90%):
class RNNMoreLinear(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNNMoreLinear, self).__init__()
self.hidden_size = hidden_size
self.input_linear = nn.Linear(input_size, input_size)
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.softmax = nn.LogSoftmax()
def forward(self, input, hidden):
hidden_i = self.input_linear(input)
combined = torch.cat((hidden_i, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(combined)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return Variable(torch.zeros(1, self.hidden_size))
使用GRU(正確率:54.96%):
class RNN_GRU(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN_GRU, self).__init__()
self.hidden_size = hidden_size
self.gru_layers = 2
self.gru = nn.GRU(input_size, hidden_size, self.gru_layers)
self.i2o = nn.Linear(hidden_size, output_size)
self.softmax = nn.LogSoftmax()
def forward(self, input, hidden):
output, hidden = self.gru(input.view(1 , 1, -1), hidden)
output = self.softmax(self.i2o(output.view(1, -1)))
return output, hidden
def initHidden(self):
return Variable(torch.zeros(self.gru_layers, 1, self.hidden_size))
對原始模型層疊累加(正確率: 47.69%):
class RNN_GRU(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN_GRU, self).__init__()
self.hidden_size = hidden_size
self.gru_layers = 2
self.gru = nn.GRU(input_size, hidden_size, self.gru_layers)
self.i2o = nn.Linear(hidden_size, output_size)
self.softmax = nn.LogSoftmax()
def forward(self, input, hidden):
output, hidden = self.gru(input.view(1 , 1, -1), hidden)
output = self.softmax(self.i2o(output.view(1, -1)))
return output, hidden
def initHidden(self):
return Variable(torch.zeros(self.gru_layers, 1, self.hidden_size))
從最終結果來看,此處列舉的幾個結構效果都沒有原文中參數下的60%正確率效果好。在hidden size=32下,原始網絡的結果比較高,加一個線性層對結果有改善;而採用gru和多層RNN的效果均有所下降。其中,gru結果下降可能是參數設置問題,多層RNN結果下降可能是由於原RNN的輸出經過softmax,疊加時特徵提取效果不佳。
最後,附上測試代碼(根據原文修改):
import torch
from torch.autograd import Variable
from data import *
import random
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
model_save_name = 'char-rnn-classification-MultiRNN.pt'
rnn = torch.load(model_save_name)
def categoryFromOutput(output):
top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
category_i = top_i[0][0]
return all_categories[category_i], category_i
def randomChoice(l):
return l[random.randint(0, len(l) - 1)]
def randomTrainingPair():
category = randomChoice(all_categories)
line = randomChoice(category_lines[category])
category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
line_tensor = Variable(lineToTensor(line))
return category, line, category_tensor, line_tensor
def randomTrainingExample():
category = randomChoice(all_categories)
line = randomChoice(category_lines[category])
category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
line_tensor = Variable(lineToTensor(line))
return category, line, category_tensor, line_tensor
# Keep track of correct guesses in a confusion matrix
confusion = torch.zeros(n_categories, n_categories)
n_confusion = 10000
# Just return an output given a line
def evaluate(line_tensor):
hidden = rnn.initHidden()
for i in range(line_tensor.size()[0]):
output, hidden = rnn(line_tensor[i], hidden)
return output
# Go through a bunch of examples and record which are correctly guessed
for i in range(n_confusion):
category, line, category_tensor, line_tensor = randomTrainingExample()
output = evaluate(line_tensor)
guess, guess_i = categoryFromOutput(output)
category_i = all_categories.index(category)
confusion[category_i][guess_i] += 1
correct = 0
for i in range(n_categories):
correct += confusion[i][i]
correct /= n_confusion
print("accuracy of {} random samples: {}".format(n_confusion, correct))
# Normalize by dividing every row by its sum
for i in range(n_categories):
confusion[i] = confusion[i] / confusion[i].sum()
# Set up plot
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(confusion.numpy())
fig.colorbar(cax)
# Set up axes
ax.set_xticklabels([''] + all_categories, rotation=90)
ax.set_yticklabels([''] + all_categories)
# Force label at every tick
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
# sphinx_gallery_thumbnail_number = 2
plt.show()