2. Experiment Tracking
This matters because model training is not a single run, it is a sequence of decisions. If you do not record configs, metrics, and artifacts in a structured way, you cannot tell whether a change actually helped or just got lucky.
This notebook shows a minimal tracking pattern you can implement without adding a heavier experiment platform first.
2.1. Setup
[ ]:
from pathlib import Path
import json
import time
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, random_split
torch.manual_seed(13)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
run_dir = Path('output/experiments')
run_dir.mkdir(parents=True, exist_ok=True)
2.2. Data
[ ]:
x = torch.randn(1000, 6)
score = x[:, 0] - 0.7 * x[:, 1] + 0.3 * x[:, 2] * x[:, 3] + 0.4 * torch.randn(1000)
y = (score > 0).float().unsqueeze(1)
dataset = TensorDataset(x, y)
train_ds, val_ds = random_split(dataset, [800, 200], generator=torch.Generator().manual_seed(13))
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=128)
2.3. Tracking Helpers
[ ]:
def evaluate(model, loader, loss_fn):
model.eval()
total_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for xb, yb in loader:
xb, yb = xb.to(device), yb.to(device)
logits = model(xb)
loss = loss_fn(logits, yb)
total_loss += loss.item() * len(xb)
preds = (torch.sigmoid(logits) > 0.5).float()
correct += (preds == yb).sum().item()
total += yb.numel()
return {'loss': total_loss / total, 'accuracy': correct / total}
def run_experiment(config):
model = nn.Sequential(nn.Linear(6, config['hidden']), nn.ReLU(), nn.Linear(config['hidden'], 1)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
loss_fn = nn.BCEWithLogitsLoss()
history = []
for epoch in range(config['epochs']):
model.train()
for xb, yb in train_loader:
xb, yb = xb.to(device), yb.to(device)
optimizer.zero_grad()
loss = loss_fn(model(xb), yb)
loss.backward()
optimizer.step()
metrics = evaluate(model, val_loader, loss_fn)
metrics['epoch'] = epoch + 1
history.append(metrics)
run_name = f"run-{int(time.time() * 1000)}-{config['hidden']}h"
path = run_dir / f'{run_name}.json'
payload = {'config': config, 'history': history, 'best_val_loss': min(row['loss'] for row in history)}
path.write_text(json.dumps(payload, indent=2))
return path, payload
2.4. Compare Two Runs
[ ]:
runs = []
for config in [
{'hidden': 8, 'lr': 1e-2, 'epochs': 10},
{'hidden': 32, 'lr': 1e-2, 'epochs': 10},
]:
path, payload = run_experiment(config)
runs.append((path, payload))
print(path.name, payload['config'], 'best_val_loss=', round(payload['best_val_loss'], 4))
2.5. Read Back the Saved Metadata
[ ]:
saved = []
for path, _ in runs:
saved.append(json.loads(path.read_text()))
sorted(
({'config': item['config'], 'best_val_loss': round(item['best_val_loss'], 4)} for item in saved),
key=lambda row: row['best_val_loss']
)