Deep Learning 2: Part 1 Lesson 5

Hiromi Suenaga
Jan 10, 2018 · 13 min read

Lesson 5

I. Introduction

II. Collaborative Filtering — using MovieLens dataset

ratings = pd.read_csv(path+'ratings.csv')
ratings.head()

Create subset for Excel

g=ratings.groupby('userId')['rating'].count()
topUsers=g.sort_values(ascending=False)[:15]
g=ratings.groupby('movieId')['rating'].count()
topMovies=g.sort_values(ascending=False)[:15]
top_r = ratings.join(topUsers, rsuffix='_r', how='inner', on='userId')
top_r = top_r.join(topMovies, rsuffix='_r', how='inner', on='movieId')
pd.crosstab(top_r.userId, top_r.movieId, top_r.rating, aggfunc=np.sum)

Simple Python version [26:03]

val_idxs = get_cv_idxs(len(ratings)) 
wd = 2e-4
n_factors = 50
cf = CollabFilterDataset.from_csv(path, 'ratings.csv', 'userId', 'movieId', 'rating')
learn = cf.get_learner(n_factors, val_idxs, 64, opt_fn=optim.Adam)learn.fit(1e-2, 2, wds=wd, cycle_len=1, cycle_mult=2)
Output MSE
math.sqrt(0.765)
preds = learn.predict()
y = learn.data.val_y
sns.jointplot(preds, y, kind='hex', stat_func=None)

Dot product with Python

a = T([[1., 2], [3, 4]])
b = T([[2., 2], [10, 10]])
(a*b).sum(1)6
70
[torch.FloatTensor of size 2]

Building our first custom layer (i.e. PyTorch module) [33:55]

class DotProduct (nn.Module):
def forward(self, u, m): return (u*m).sum(1)
model = DotProduct()
model(a,b)
6
70
[torch.FloatTensor of size 2]

Building more complex module [41:31]

u_uniq = ratings.userId.unique() 
user2idx = {o:i for i,o in enumerate(u_uniq)}
ratings.userId = ratings.userId.apply(lambda x: user2idx[x])
m_uniq = ratings.movieId.unique()
movie2idx = {o:i for i,o in enumerate(m_uniq)}
ratings.movieId = ratings.movieId.apply(lambda x: movie2idx[x])
n_users=int(ratings.userId.nunique()) n_movies=int(ratings.movieId.nunique())
class EmbeddingDot(nn.Module):
def __init__(self, n_users, n_movies):
super().__init__()
self.u = nn.Embedding(n_users, n_factors)
self.m = nn.Embedding(n_movies, n_factors)
self.u.weight.data.uniform_(0,0.05)
self.m.weight.data.uniform_(0,0.05)

def forward(self, cats, conts):
users,movies = cats[:,0],cats[:,1]
u,m = self.u(users),self.m(movies)
return (u*m).sum(1)
x = ratings.drop(['rating', 'timestamp'],axis=1)
y = ratings['rating'].astype(np.float32)
data = ColumnarModelData.from_data_frame(path, val_idxs, x, y, ['userId', 'movieId'], 64)
wd=1e-5
model = EmbeddingDot(n_users, n_movies).cuda()
opt = optim.SGD(model.parameters(), 1e-1, weight_decay=wd, momentum=0.9)
fit(model, data, 3, opt, F.mse_loss)

Let’s improve our model

min_rating,max_rating = ratings.rating.min(),ratings.rating.max()
min_rating,max_rating
def get_emb(ni,nf):
e = nn.Embedding(ni, nf)
e.weight.data.uniform_(-0.01,0.01)
return e
class EmbeddingDotBias(nn.Module):
def __init__(self, n_users, n_movies):
super().__init__()
(self.u, self.m, self.ub, self.mb) = [get_emb(*o) for o in [
(n_users, n_factors), (n_movies, n_factors), (n_users,1), (n_movies,1)
]]

def forward(self, cats, conts):
users,movies = cats[:,0],cats[:,1]
um = (self.u(users)* self.m(movies)).sum(1)
res = um + self.ub(users).squeeze() + self.mb(movies).squeeze()
res = F.sigmoid(res) * (max_rating-min_rating) + min_rating
return res
wd=2e-4
model = EmbeddingDotBias(cf.n_users, cf.n_items).cuda()
opt = optim.SGD(model.parameters(), 1e-1, weight_decay=wd, momentum=0.9)
fit(model, data, 3, opt, F.mse_loss)
[ 0. 0.85056 0.83742]
[ 1. 0.79628 0.81775]
[ 2. 0.8012 0.80994]

Neural Net Version [1:17:21]

class EmbeddingNet(nn.Module):
def __init__(self, n_users, n_movies, nh=10, p1=0.5, p2=0.5):
super().__init__()
(self.u, self.m) = [get_emb(*o) for o in [
(n_users, n_factors), (n_movies, n_factors)]]
self.lin1 = nn.Linear(n_factors*2, nh)
self.lin2 = nn.Linear(nh, 1)
self.drop1 = nn.Dropout(p1)
self.drop2 = nn.Dropout(p2)

def forward(self, cats, conts):
users,movies = cats[:,0],cats[:,1]
x = self.drop1(torch.cat([self.u(users),self.m(movies)], dim=1))
x = self.drop2(F.relu(self.lin1(x)))
return F.sigmoid(self.lin2(x)) * (max_rating-min_rating+1) + min_rating-0.5
wd=1e-5
model = EmbeddingNet(n_users, n_movies).cuda()
opt = optim.Adam(model.parameters(), 1e-3, weight_decay=wd)
fit(model, data, 3, opt, F.mse_loss)
A Jupyter Widget[ 0. 0.88043 0.82363]
[ 1. 0.8941 0.81264]
[ 2. 0.86179 0.80706]

What is happening in the training loop? [1:33:21]

opt = optim.SGD(model.parameters(), 1e-1, weight_decay=wd, momentum=0.9)

Chain Rule and Backpropagation

Overview of chain rule

Momentum [1:53:47]

Adam [1:59:04]

avg_loss = avg_loss * avg_mom + loss * (1-avg_mom)

Some intuitions

AdamW[2:11:18]


Welcome to a place where words matter. On Medium, smart voices and original ideas take center stage - with no ads in sight. Watch
Follow all the topics you care about, and we’ll deliver the best stories for you to your homepage and inbox. Explore
Get unlimited access to the best stories on Medium — and support writers while you’re at it. Just $5/month. Upgrade