手把手教你使用pytorch实现双向LSTM机器翻译
目錄
- 前言
- 1. 數(shù)據(jù)集
- 1.1 下載數(shù)據(jù)集并處理
- 1.2 將數(shù)據(jù)集分為source和target
- 1.3 定義詞匯類
- 1.4 獲取訓(xùn)練集
- 2. 定義模型
- 2.1 導(dǎo)入相關(guān)工具包
- 2.2. 定義Encoder模型
- 2.3.定義Decoder模型
- 2.4.定義seq2seq模型
- 2.5.定義loss
- 3.訓(xùn)練函數(shù)
- 4.預(yù)測函數(shù)
- 5.測試
- 5.1定義參數(shù)
- 5.2.訓(xùn)練
- 5.3.預(yù)測
前言
這兩天學(xué)習(xí)了RNN,就想實(shí)現(xiàn)一下機(jī)器翻譯,如果有什么問題,可以隨時(shí)交流!
1. 數(shù)據(jù)集
本文的數(shù)據(jù)集來自李沐老師的《動手學(xué)深度學(xué)習(xí)》
數(shù)據(jù)集下載 : https://github.com/codefly-xtl/translation/tree/main/data
首先看看數(shù)據(jù)集啥樣子:左邊為英語,右邊為法語
1.1 下載數(shù)據(jù)集并處理
在這一部分,我們首先讀取出數(shù)據(jù)存放到 raw_data 中,再使用空格替換不間斷字符,再小寫。再對標(biāo)點(diǎn)符號前添加空格,最后返回?cái)?shù)據(jù)。
def process_data():# 判斷標(biāo)點(diǎn)前是否有空格def no_peace(char, pre_char):return char in set(',.!?') and pre_char != ' '# 加載數(shù)據(jù)with open('./data/fra.txt', encoding='utf-8') as f:raw_data = f.read()# 對數(shù)據(jù)進(jìn)行處理:變小寫,在標(biāo)點(diǎn)符號前插入空格raw_data = raw_data.replace('\u202f', ' ').replace('\xa0', ' ').lower()out = [' ' + char if i > 0 and no_peace(char, raw_data[i - 1]) else char for i, char in enumerate(raw_data)]data = ''.join(out)return data1.2 將數(shù)據(jù)集分為source和target
在這一部分,我們獲取原文以及譯文的句子,source存放原文,target存放譯文。根據(jù) \n 取出數(shù)據(jù)的每一行,再根據(jù) \t 將數(shù)據(jù)分為原文以及譯文,再分別對原文以及譯文根據(jù) 空格 將句子按詞分開組成列表。source例子如下:source = [[‘i’, ‘a(chǎn)m’, ‘person’],[‘i’, ‘like’, ‘you’]]
def get_sentence(data):# 存儲兩種語言source = []target = []# 取出每一行for line in data.split('\n'):# 取出每一行的兩個(gè)部分parts = line.split('\t')if len(parts) == 2:# 將英語放入sourcesource.append(parts[0].split(' '))# 將法語放入targettarget.append(parts[1].split(' '))# source 的樣子如下:# source = [['i', 'am', 'person'],['i', 'like', 'you']]return source, target1.3 定義詞匯類
這個(gè)類的創(chuàng)建過程如下:
1.4 獲取訓(xùn)練集
在這一部分,用于獲取訓(xùn)練集,步驟如下:
2. 定義模型
2.1 導(dǎo)入相關(guān)工具包
import torch from torch import nn import utils2.2. 定義Encoder模型
class Encoder(nn.Module):def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, bidirectional=False):super().__init__()self.embedding = nn.Embedding(vocab_size, embed_size)self.rnn = nn.LSTM(embed_size, num_hiddens, num_layers, bidirectional=bidirectional)self.num_layers = num_layersself.num_hiddens = num_hiddensself.bidirectional = bidirectionalif bidirectional:# 由于每一層有兩個(gè)方向,因此需要將兩個(gè)方向進(jìn)行合并self.linear_hidden = nn.Linear(self.num_hiddens * 2, self.num_hiddens)self.linear_content = nn.Linear(self.num_hiddens * 2, self.num_hiddens)def forward(self, X):X = self.embedding(X)X = X.permute(1, 0, 2)output, state = self.rnn(X)hidden_state, content_state = stateif self.bidirectional:# 將每一層的正反state拼在一起,再放入神經(jīng)網(wǎng)絡(luò)中,使得與decoder的num_hiddens一致hidden_state = torch.cat([hidden_state[:self.num_layers * 2:2, :, :], hidden_state[1:self.num_layers * 2 + 1:2, :, :]], dim=2)content_state = torch.cat([content_state[:self.num_layers * 2:2, :, :], content_state[1:self.num_layers * 2 + 1:2, :, :]], dim=2)hidden_state = self.linear_hidden(hidden_state)content_state = self.linear_content(content_state)return hidden_state, content_state2.3.定義Decoder模型
class Decoder(nn.Module):def __init__(self, vocab_size, embed_size, num_hiddens, num_layers):super().__init__()self.embedding = nn.Embedding(vocab_size, embed_size)self.rnn = nn.LSTM(embed_size + num_hiddens * 2, num_hiddens, num_layers)self.linear = nn.Linear(num_hiddens, vocab_size)def init_state(self, encoder_output_state):return encoder_output_statedef forward(self, X, state, predict=False):if not predict:X = self.embedding(X).permute(1, 0, 2)# 由于decoder的信息全由encoder的最后一個(gè)時(shí)間state得到,# 因此最后一個(gè)state的最后一層很重要,要盡可能的充分利用,# 因此將最后一個(gè)state的最后一層也作為decoder的輸入hidden_state, content_state = statenew_hidden_state = hidden_state[-1].unsqueeze(0).repeat(target.shape[0], 1, 1)new_content_state = content_state[-1].unsqueeze(0).repeat(target.shape[0], 1, 1)X = torch.cat([new_hidden_state, new_content_state, X], dim=2)# X 的shape為:(num_steps, batch_size, decoder_embed_size + encoder_hidden_num * 2)output, state = self.rnn(X, state)output = self.linear(output).permute(1, 0, 2)return output, state2.4.定義seq2seq模型
class EncoderDecoder(nn.Module):def __init__(self, encoder, decoder):super().__init__()self.encoder = encoderself.decoder = decoderdef forward(self, source, target):encoder_output_state = self.encoder(source)decoder_init_state = self.decoder.init_state(encoder_output_state)return self.decoder(target, decoder_init_state)2.5.定義loss
由于損失矩陣形狀為 (batch_size, steps_num),每一個(gè)句子后邊有一部分是填充過的,因此不能計(jì)算填充數(shù)據(jù)的損失
class Myloss(nn.CrossEntropyLoss):def value_mask(self, X, valid_len):mask = torch.arange(X.shape[1], dtype=torch.float32, device=X.device)[None, :] > valid_len[:, None]X[mask] = 0return Xdef forward(self, predict, target, valid_len=None):weights = torch.ones_like(target)weights = self.value_mask(weights, valid_len)self.reduction = 'none'unweighted_loss = super().forward(predict.permute(0, 2, 1), target)weighted_loss = unweighted_loss * weightsreturn weighted_loss.mean()3.訓(xùn)練函數(shù)
def train(net, data_iter, lr, num_epochs, device):net.to(device)optimizer = torch.optim.Adam(net.parameters(), lr=lr)loss = Myloss()net.train()for epoch in range(num_epochs):for batch in data_iter:optimizer.zero_grad()# 將數(shù)據(jù)放到device上source, source_valid_len, target, target_valid_len = [x.to(device) for x in batch]# 再每一個(gè)句子前面添加<bos>的index,bos的index為2bos = torch.tensor([2] * target.shape[0], device=device).reshape(-1, 1)decoder_input = torch.cat([bos, target[:, :-1]], dim=1)# 進(jìn)行優(yōu)化Y_hat, _ = net(source, decoder_input)l = loss(Y_hat, target, target_valid_len)l.backward()optimizer.step()print(l)4.預(yù)測函數(shù)
def predict(net, source_sentence, source_Vocab, target_Vocab, num_steps, device):# 用于存儲譯文result = []# 原文source, source_valid_len = source_Vocab.prase(source_sentence, num_steps)source, source_valid_len = source.to(device), source_valid_len.to(device)# 獲取最后一個(gè)狀態(tài)state = net.encoder(source)# 獲取encoder的最后一個(gè)state的信息hidden_state, content_state = statenew_hidden_state = hidden_state[-1].unsqueeze(0)new_content_state = content_state[-1].unsqueeze(0)# 初始化decoder的第一個(gè)狀態(tài)state = net.decoder.init_state(state)# 構(gòu)造翻譯的第一個(gè)詞X = torch.tensor(target_Vocab.word_to_index['<eos>']).reshape(-1, 1).to(device)X = net.decoder.embedding(X).permute(1, 0, 2)X = torch.cat([new_hidden_state, new_content_state, X], dim=2)for i in range(num_steps):# 開啟預(yù)測模式,進(jìn)行預(yù)測Y, state = net.decoder(X, state, True)X = Y.argmax(dim=2)# 獲取最大概率的indexpred = X.squeeze(dim=0).type(torch.int32).item()# 如果index為eos,則停止預(yù)測if pred == target_Vocab.word_to_index['<eos>']:breakX = net.decoder.embedding(X).permute(1, 0, 2)X = torch.cat([new_hidden_state, new_content_state, X], dim=2)result.append(pred)return ' '.join(target_Vocab.to_word(result))5.測試
5.1定義參數(shù)
batch_size = 64 num_steps = 20 train_iter, source_Vocab, target_Vocab = utils.get_train_iter(batch_size, num_steps) encoder_embed_size = 300 decoder_embed_size = 300 hidden_size = 64 num_layers = 2 encoder = Encoder(len(source_Vocab), decoder_embed_size, hidden_size, num_layers, True) decoder = Decoder(len(target_Vocab), decoder_embed_size, hidden_size, num_layers) net = EncoderDecoder(encoder, decoder) num_epoch = 100 lr = 0.001 device = 'cuda'5.2.訓(xùn)練
train(net, train_iter, lr, num_epoch, device) # 顯示如下: tensor(0.0147, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0137, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0139, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0128, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0126, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0126, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0123, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0120, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0128, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0121, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0117, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0122, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0119, device='cuda:0', grad_fn=<MeanBackward0>) tensor(0.0124, device='cuda:0', grad_fn=<MeanBackward0>)5.3.預(yù)測
predict(net, 'He did it just for fun.', source_Vocab, target_Vocab, num_steps, device)總結(jié)
以上是生活随笔為你收集整理的手把手教你使用pytorch实现双向LSTM机器翻译的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: G - Godsend CodeForc
- 下一篇: 骨传导耳机原理是什么?骨传导耳机科普及品