2. Recommendation Systems

This matters because recommendation models often look simple while hiding strong assumptions about interactions and negatives. Focus on how embeddings represent users and items and what the score actually means.

2.1. User-item interactions

This synthetic example builds positive interactions and a small set of negatives. The point is the modeling pattern, not the dataset scale.

[ ]:
import os

import torch
from torch import nn

torch.manual_seed(61)
check_mode = os.getenv('PYTORCH_INTRO_CHECK_MODE') == '1'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

user_ids = torch.tensor([0, 0, 1, 1, 2, 2, 3, 3], device=device)
item_ids = torch.tensor([0, 1, 1, 2, 2, 3, 0, 3], device=device)
labels = torch.tensor([1, 0, 1, 1, 0, 1, 0, 1], dtype=torch.float32, device=device)
num_users = int(user_ids.max().item()) + 1
num_items = int(item_ids.max().item()) + 1

2.2. Matrix factorization with embeddings

A dot product between user and item embeddings is the simplest useful recommendation model.

[ ]:
class Recommender(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=8):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_bias = nn.Embedding(num_items, 1)

    def forward(self, user_ids, item_ids):
        user_vector = self.user_embedding(user_ids)
        item_vector = self.item_embedding(item_ids)
        score = (user_vector * item_vector).sum(dim=1, keepdim=True)
        score = score + self.user_bias(user_ids) + self.item_bias(item_ids)
        return score.squeeze(1)

model = Recommender(num_users, num_items).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.05)
criterion = nn.BCEWithLogitsLoss()

steps = 5 if check_mode else 100
for step in range(steps):
    optimizer.zero_grad(set_to_none=True)
    logits = model(user_ids, item_ids)
    loss = criterion(logits, labels)
    loss.backward()
    optimizer.step()
    if step in {0, steps - 1}:
        print(step, round(loss.item(), 4))

with torch.inference_mode():
    recommendations = torch.sigmoid(model(user_ids, item_ids))
print(recommendations)
assert recommendations.shape == labels.shape