How LSTM’s work

Isabella Grandic
May 26 · 6 min read
An RNN is a network that feeds into itself

How LSTMs work

Math time 🤩✨
The structure of an LSTM

Learn Gate

Forget Gate

Remember Gate

Use Gate

U = long term memory that has not been forgotten
V = Learn gate output

Sentiment Analysis

import torch.nn as nnclass SentimentRNN(nn.Module):
The RNN model that will be used to perform Sentiment analysis.
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
Initialize the model by setting up the layers.
super(SentimentRNN, self).__init__()
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim

# embedding and LSTM layers
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers,
dropout=drop_prob, batch_first=True)

# dropout layer
self.dropout = nn.Dropout(0.3)

# linear and sigmoid layers
self.fc = nn.Linear(hidden_dim, output_size)
self.sig = nn.Sigmoid()
def forward(self, x, hidden):
Perform a forward pass of our model on some input and hidden state.
batch_size = x.size(0)
# embeddings and lstm_out
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)

# stack up lstm outputs
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)

# dropout and fully-connected layer
out = self.dropout(lstm_out)
out = self.fc(out)
# sigmoid function
sig_out = self.sig(out)

# reshape to be batch_size first
sig_out = sig_out.view(batch_size, -1)
sig_out = sig_out[:, -1] # get last batch of labels

# return last sigmoid output and hidden state
return sig_out, hidden

def init_hidden(self, batch_size):
''' Initializes hidden state '''

weight = next(self.parameters()).data
hidden = (, batch_size, self.hidden_dim).zero_(),, batch_size, self.hidden_dim).zero_())

return hidden

