from torch.utils.data import DataLoader
train_loader = DataLoader(
dataset,
batch_size=32,
shuffle=True, # Shuffle training data
num_workers=4, # Parallel data loading
pin_memory=True # Faster GPU transfer
)
# Iterate
for batch_data, batch_labels in train_loader:
# Train on batch
pass
from torch.utils.data import Dataset
from PIL import Image
import os
class ImageDataset(Dataset):
"""Load images from directory."""
def __init__(self, image_dir, transform=None):
self.image_dir = image_dir
self.transform = transform
self.images = os.listdir(image_dir)
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path = os.path.join(self.image_dir, self.images[idx])
image = Image.open(img_path).convert('RGB')
# Extract label from filename (e.g., "cat_001.jpg")
label = 0 if 'cat' in self.images[idx] else 1
if self.transform:
image = self.transform(image)
return image, label
# Transforms
from torchvision import transforms
train_transforms = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
dataset = ImageDataset('data/images', transform=train_transforms)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
from torch.optim.lr_scheduler import CosineAnnealingLR
scheduler = CosineAnnealingLR(optimizer, T_max=100, eta_min=1e-6)
# Smoothly decreases LR following cosine curve
from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(
optimizer,
mode='min',
factor=0.1,
patience=5,
verbose=True
)
for epoch in range(num_epochs):
train_loss = train_one_epoch(...)
val_loss = validate(...)
scheduler.step(val_loss) # Monitor validation loss
from torch.optim.lr_scheduler import OneCycleLR
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler = OneCycleLR(
optimizer,
max_lr=0.1,
steps_per_epoch=len(train_loader),
epochs=num_epochs
)
for epoch in range(num_epochs):
for batch in train_loader:
train_step(...)
scheduler.step() # Step per batch, not epoch!
from torch.optim.lr_scheduler import LambdaLR
def warmup_lambda(epoch):
if epoch < 5:
return (epoch + 1) / 5 # Linear warmup
return 1.0
scheduler = LambdaLR(optimizer, lr_lambda=warmup_lambda)
import torch.nn as nn
# Gradient clipping
max_grad_norm = 1.0
for batch in train_loader:
loss = train_step(...)
loss.backward()
# Clip gradients
nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
optimizer.step()
import torch.nn as nn
# Weighted loss
class_weights = torch.tensor([1.0, 5.0]) # Weight minority class more
criterion = nn.CrossEntropyLoss(weight=class_weights)
# Regularization techniques
model = nn.Sequential(
nn.Linear(100, 50),
nn.Dropout(0.5), # Dropout
nn.Linear(50, 10)
)
# L2 regularization via optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
# Data augmentation
transforms = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2)
])
# Early stopping
best_val_loss = float('inf')
patience = 10
patience_counter = 0
for epoch in range(num_epochs):
val_loss = validate(...)
if val_loss < best_val_loss:
best_val_loss = val_loss
patience_counter = 0
save_model()
else:
patience_counter += 1
if patience_counter >= patience:
print("Early stopping")
break
# Mixed precision training (PyTorch 1.6+)
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
for batch in train_loader:
optimizer.zero_grad()
# Forward in mixed precision
with autocast():
predictions = model(data)
loss = criterion(predictions, targets)
# Backward with scaling
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
import wandb # For experiment tracking
class Trainer:
"""Production training class."""
def __init__(self, model, train_loader, val_loader, config):
self.model = model
self.train_loader = train_loader
self.val_loader = val_loader
self.config = config
# Device
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model = self.model.to(self.device)
# Loss and optimizer
self.criterion = nn.CrossEntropyLoss()
self.optimizer = optim.AdamW(
model.parameters(),
lr=config['lr'],
weight_decay=config['weight_decay']
)
# Scheduler
self.scheduler = optim.lr_scheduler.CosineAnnealingLR(
self.optimizer,
T_max=config['epochs']
)
# Mixed precision
self.scaler = GradScaler()
# Tracking
self.best_val_acc = 0.0
def train_epoch(self):
"""Train one epoch."""
self.model.train()
total_loss = 0
correct = 0
total = 0
for data, targets in self.train_loader:
data = data.to(self.device)
targets = targets.to(self.device)
self.optimizer.zero_grad()
# Mixed precision forward
with autocast():
predictions = self.model(data)
loss = self.criterion(predictions, targets)
# Backward
self.scaler.scale(loss).backward()
# Gradient clipping
self.scaler.unscale_(self.optimizer)
nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
self.scaler.step(self.optimizer)
self.scaler.update()
# Statistics
total_loss += loss.item()
_, predicted = predictions.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
avg_loss = total_loss / len(self.train_loader)
accuracy = 100. * correct / total
return avg_loss, accuracy
@torch.no_grad()
def validate(self):
"""Validate model."""
self.model.eval()
total_loss = 0
correct = 0
total = 0
for data, targets in self.val_loader:
data = data.to(self.device)
targets = targets.to(self.device)
predictions = self.model(data)
loss = self.criterion(predictions, targets)
total_loss += loss.item()
_, predicted = predictions.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
avg_loss = total_loss / len(self.val_loader)
accuracy = 100. * correct / total
return avg_loss, accuracy
def train(self):
"""Complete training loop."""
for epoch in range(self.config['epochs']):
print(f"\nEpoch {epoch + 1}/{self.config['epochs']}")
print("-" * 60)
# Train
train_loss, train_acc = self.train_epoch()
# Validate
val_loss, val_acc = self.validate()
# Scheduler step
self.scheduler.step()
# Log
print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
print(f"LR: {self.optimizer.param_groups[0]['lr']:.6f}")
# Save best model
if val_acc > self.best_val_acc:
self.best_val_acc = val_acc
self.save_checkpoint('best_model.pth')
print(f"β Saved best model (Val Acc: {val_acc:.2f}%)")
def save_checkpoint(self, path):
"""Save model checkpoint."""
torch.save({
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'scheduler_state_dict': self.scheduler.state_dict(),
'best_val_acc': self.best_val_acc,
}, path)
def load_checkpoint(self, path):
"""Load model checkpoint."""
checkpoint = torch.load(path)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
self.best_val_acc = checkpoint['best_val_acc']
# Usage
config = {
'lr': 0.001,
'weight_decay': 0.01,
'epochs': 50
}
trainer = Trainer(model, train_loader, val_loader, config)
trainer.train()