Computer Vision Examples
This section provides comprehensive examples for computer vision tasks using Torchium’s specialized optimizers and loss functions.
Object Detection
YOLO-style Detection with Advanced Losses
import torch
import torch.nn as nn
import torchium
class YOLODetectionModel(nn.Module):
def __init__(self, num_classes=80, num_anchors=3):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1)
)
self.classifier = nn.Linear(128, num_classes * num_anchors)
self.regressor = nn.Linear(128, 4 * num_anchors) # bbox coordinates
self.objectness = nn.Linear(128, num_anchors) # object confidence
model = YOLODetectionModel()
# Use Ranger optimizer for computer vision
optimizer = torchium.optimizers.Ranger(
model.parameters(),
lr=1e-3,
alpha=0.5,
k=6,
N_sma_threshhold=5,
betas=(0.9, 0.999),
eps=1e-8,
weight_decay=1e-4
)
# Advanced detection loss combining multiple IoU variants
class AdvancedYOLOLoss(nn.Module):
def __init__(self, num_classes=80, num_anchors=3):
super().__init__()
self.num_classes = num_classes
self.num_anchors = num_anchors
# Classification loss
self.cls_loss = torchium.losses.FocalLoss(alpha=0.25, gamma=2.0)
# Regression losses
self.giou_loss = torchium.losses.GIoULoss()
self.diou_loss = torchium.losses.DIoULoss()
self.ciou_loss = torchium.losses.CIoULoss()
self.eiou_loss = torchium.losses.EIoULoss()
# Objectness loss
self.obj_loss = torchium.losses.BCEWithLogitsLoss()
def forward(self, cls_pred, reg_pred, obj_pred, cls_target, reg_target, obj_target):
# Classification loss
cls_loss = self.cls_loss(cls_pred, cls_target)
# Regression losses
giou_loss = self.giou_loss(reg_pred, reg_target)
diou_loss = self.diou_loss(reg_pred, reg_target)
ciou_loss = self.ciou_loss(reg_pred, reg_target)
eiou_loss = self.eiou_loss(reg_pred, reg_target)
# Objectness loss
obj_loss = self.obj_loss(obj_pred, obj_target)
# Weighted combination
total_loss = (cls_loss +
0.5 * giou_loss +
0.3 * diou_loss +
0.2 * ciou_loss +
0.1 * eiou_loss +
obj_loss)
return total_loss
criterion = AdvancedYOLOLoss()
# Training loop
def train_detection_model(model, optimizer, criterion, dataloader, num_epochs=100):
model.train()
for epoch in range(num_epochs):
total_loss = 0
for batch in dataloader:
optimizer.zero_grad()
# Forward pass
features = model.backbone(batch.images)
cls_pred = model.classifier(features)
reg_pred = model.regressor(features)
obj_pred = model.objectness(features)
# Compute loss
loss = criterion(cls_pred, reg_pred, obj_pred,
batch.cls_targets, batch.reg_targets, batch.obj_targets)
# Backward pass
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(dataloader)
print(f'Epoch {epoch}, Loss: {avg_loss:.4f}')
Image Segmentation
U-Net for Medical Image Segmentation
class UNet(nn.Module):
def __init__(self, in_channels=3, out_channels=1):
super().__init__()
# Encoder
self.enc1 = self._conv_block(in_channels, 64)
self.enc2 = self._conv_block(64, 128)
self.enc3 = self._conv_block(128, 256)
self.enc4 = self._conv_block(256, 512)
# Bottleneck
self.bottleneck = self._conv_block(512, 1024)
# Decoder
self.dec4 = self._conv_block(1024 + 512, 512)
self.dec3 = self._conv_block(512 + 256, 256)
self.dec2 = self._conv_block(256 + 128, 128)
self.dec1 = self._conv_block(128 + 64, 64)
# Final layer
self.final = nn.Conv2d(64, out_channels, 1)
# Pooling and upsampling
self.pool = nn.MaxPool2d(2)
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
def _conv_block(self, in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, 3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
# Encoder
enc1 = self.enc1(x)
enc2 = self.enc2(self.pool(enc1))
enc3 = self.enc3(self.pool(enc2))
enc4 = self.enc4(self.pool(enc3))
# Bottleneck
bottleneck = self.bottleneck(self.pool(enc4))
# Decoder
dec4 = self.dec4(torch.cat([self.up(bottleneck), enc4], dim=1))
dec3 = self.dec3(torch.cat([self.up(dec4), enc3], dim=1))
dec2 = self.dec2(torch.cat([self.up(dec3), enc2], dim=1))
dec1 = self.dec1(torch.cat([self.up(dec2), enc1], dim=1))
return torch.sigmoid(self.final(dec1))
model = UNet(in_channels=1, out_channels=1) # Grayscale medical images
# Use SAM for better generalization in medical imaging
optimizer = torchium.optimizers.SAM(
model.parameters(),
lr=1e-3,
rho=0.05,
adaptive=True
)
# Medical segmentation loss combination
class MedicalSegmentationLoss(nn.Module):
def __init__(self):
super().__init__()
self.dice = torchium.losses.DiceLoss(smooth=1e-5)
self.tversky = torchium.losses.TverskyLoss(alpha=0.3, beta=0.7)
self.focal = torchium.losses.FocalLoss(alpha=0.25, gamma=2.0)
self.lovasz = torchium.losses.LovaszLoss()
self.boundary = torchium.losses.BoundaryLoss()
def forward(self, pred, target):
dice_loss = self.dice(pred, target)
tversky_loss = self.tversky(pred, target)
focal_loss = self.focal(pred, target)
lovasz_loss = self.lovasz(pred, target)
boundary_loss = self.boundary(pred, target)
# Medical imaging specific weighting
return (0.3 * dice_loss +
0.25 * tversky_loss +
0.2 * focal_loss +
0.15 * lovasz_loss +
0.1 * boundary_loss)
criterion = MedicalSegmentationLoss()
# Training loop with SAM
def train_segmentation_model(model, optimizer, criterion, dataloader, num_epochs=100):
model.train()
for epoch in range(num_epochs):
total_loss = 0
for batch in dataloader:
# First forward pass
pred = model(batch.images)
loss = criterion(pred, batch.masks)
loss.backward()
# SAM perturbation step
optimizer.first_step(zero_grad=True)
# Second forward pass
pred = model(batch.images)
loss = criterion(pred, batch.masks)
loss.backward()
# SAM update step
optimizer.second_step(zero_grad=True)
total_loss += loss.item()
avg_loss = total_loss / len(dataloader)
print(f'Epoch {epoch}, Loss: {avg_loss:.4f}')
Super Resolution
ESRGAN-style Super Resolution
class ESRGANGenerator(nn.Module):
def __init__(self, scale_factor=4):
super().__init__()
self.scale_factor = scale_factor
# Initial convolution
self.conv1 = nn.Conv2d(3, 64, 9, padding=4)
# Residual blocks
self.res_blocks = nn.Sequential(*[
self._residual_block(64) for _ in range(16)
])
# Final convolutions
self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv3 = nn.Conv2d(64, 3 * scale_factor**2, 9, padding=4)
# Pixel shuffle
self.pixel_shuffle = nn.PixelShuffle(scale_factor)
def _residual_block(self, channels):
return nn.Sequential(
nn.Conv2d(channels, channels, 3, padding=1),
nn.BatchNorm2d(channels),
nn.ReLU(inplace=True),
nn.Conv2d(channels, channels, 3, padding=1),
nn.BatchNorm2d(channels)
)
def forward(self, x):
out = self.conv1(x)
residual = out
out = self.res_blocks(out)
out = self.conv2(out)
out = out + residual
out = self.conv3(out)
out = self.pixel_shuffle(out)
return torch.tanh(out)
class ESRGANDiscriminator(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, 64, 3, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(128, 128, 3, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(256, 256, 3, stride=2, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(512, 512, 3, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True),
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(512, 1024, 1),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(1024, 1, 1)
)
def forward(self, x):
return self.net(x)
generator = ESRGANGenerator(scale_factor=4)
discriminator = ESRGANDiscriminator()
# Use different optimizers for G and D
g_optimizer = torchium.optimizers.Adam(
generator.parameters(),
lr=1e-4,
betas=(0.9, 0.999)
)
d_optimizer = torchium.optimizers.Adam(
discriminator.parameters(),
lr=1e-4,
betas=(0.9, 0.999)
)
# Perceptual super resolution loss
class PerceptualSRLoss(nn.Module):
def __init__(self):
super().__init__()
self.mse = torchium.losses.MSELoss()
self.perceptual = torchium.losses.PerceptualLoss()
self.ssim = torchium.losses.SSIMLoss()
self.vgg = torchium.losses.VGGLoss()
self.gan_loss = torchium.losses.GANLoss()
def forward(self, sr_pred, hr_target, sr_features, hr_features,
fake_pred, real_pred, loss_type='generator'):
if loss_type == 'generator':
mse_loss = self.mse(sr_pred, hr_target)
perceptual_loss = self.perceptual(sr_features, hr_features)
ssim_loss = self.ssim(sr_pred, hr_target)
vgg_loss = self.vgg(sr_pred, hr_target)
gan_loss = self.gan_loss(fake_pred, real_pred)
return (0.01 * mse_loss +
0.006 * perceptual_loss +
0.01 * ssim_loss +
0.01 * vgg_loss +
0.005 * gan_loss)
else: # discriminator
return self.gan_loss(fake_pred, real_pred)
criterion = PerceptualSRLoss()
# Training loop for super resolution
def train_sr_model(generator, discriminator, g_optimizer, d_optimizer,
criterion, dataloader, num_epochs=100):
generator.train()
discriminator.train()
for epoch in range(num_epochs):
g_loss_total = 0
d_loss_total = 0
for batch in dataloader:
lr_images = batch.lr_images
hr_images = batch.hr_images
# Train Discriminator
d_optimizer.zero_grad()
# Real images
real_pred = discriminator(hr_images)
real_target = torch.ones_like(real_pred)
# Fake images
fake_images = generator(lr_images)
fake_pred = discriminator(fake_images.detach())
fake_target = torch.zeros_like(fake_pred)
d_loss = criterion(None, None, None, None, fake_pred, real_pred, 'discriminator')
d_loss.backward()
d_optimizer.step()
# Train Generator
g_optimizer.zero_grad()
fake_images = generator(lr_images)
fake_pred = discriminator(fake_images)
real_target = torch.ones_like(fake_pred)
# Extract features for perceptual loss
sr_features = extract_features(fake_images)
hr_features = extract_features(hr_images)
g_loss = criterion(fake_images, hr_images, sr_features, hr_features,
fake_pred, real_target, 'generator')
g_loss.backward()
g_optimizer.step()
g_loss_total += g_loss.item()
d_loss_total += d_loss.item()
print(f'Epoch {epoch}, G Loss: {g_loss_total/len(dataloader):.4f}, '
f'D Loss: {d_loss_total/len(dataloader):.4f}')
Style Transfer
Neural Style Transfer with Advanced Losses
import torchvision.models as models
class StyleTransferModel(nn.Module):
def __init__(self):
super().__init__()
# Use pre-trained VGG as feature extractor
vgg = models.vgg19(pretrained=True).features
self.features = nn.ModuleList(vgg[:36]) # Up to conv4_4
def forward(self, x):
features = []
for layer in self.features:
x = layer(x)
if isinstance(layer, nn.Conv2d):
features.append(x)
return features
model = StyleTransferModel()
# Use Adam with custom parameters for style transfer
optimizer = torchium.optimizers.Adam(
model.parameters(),
lr=1e-3,
betas=(0.9, 0.999),
eps=1e-8
)
# Neural style transfer loss
class NeuralStyleLoss(nn.Module):
def __init__(self):
super().__init__()
self.content_loss = torchium.losses.ContentLoss()
self.style_loss = torchium.losses.StyleLoss()
self.tv_loss = torchium.losses.TotalVariationLoss()
def forward(self, generated_features, content_features, style_features):
content_loss = self.content_loss(generated_features, content_features)
style_loss = self.style_loss(generated_features, style_features)
tv_loss = self.tv_loss(generated_features)
return (1.0 * content_loss +
100.0 * style_loss +
0.1 * tv_loss)
criterion = NeuralStyleLoss()
# Training loop for style transfer
def train_style_transfer(model, optimizer, criterion, content_image, style_image, num_epochs=1000):
model.train()
# Initialize generated image
generated_image = content_image.clone().requires_grad_(True)
for epoch in range(num_epochs):
optimizer.zero_grad()
# Extract features
generated_features = model(generated_image)
content_features = model(content_image)
style_features = model(style_image)
# Compute loss
loss = criterion(generated_features, content_features, style_features)
# Backward pass
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print(f'Epoch {epoch}, Loss: {loss.item():.4f}')
Multi-Task Computer Vision
Multi-Task Learning for Vision
class MultiTaskVisionModel(nn.Module):
def __init__(self):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1)
)
self.classifier = nn.Linear(128, 10) # Classification
self.regressor = nn.Linear(128, 1) # Regression
self.segmenter = nn.Linear(128, 21) # Segmentation
model = MultiTaskVisionModel()
# Use PCGrad for gradient surgery
optimizer = torchium.optimizers.PCGrad(
model.parameters(),
lr=1e-3
)
# Multi-task loss with uncertainty weighting
class MultiTaskVisionLoss(nn.Module):
def __init__(self):
super().__init__()
self.uncertainty_loss = torchium.losses.UncertaintyWeightingLoss(num_tasks=3)
self.cls_loss = torchium.losses.CrossEntropyLoss()
self.reg_loss = torchium.losses.MSELoss()
self.seg_loss = torchium.losses.DiceLoss()
def forward(self, cls_pred, reg_pred, seg_pred, cls_target, reg_target, seg_target):
cls_loss = self.cls_loss(cls_pred, cls_target)
reg_loss = self.reg_loss(reg_pred, reg_target)
seg_loss = self.seg_loss(seg_pred, seg_target)
return self.uncertainty_loss([cls_loss, reg_loss, seg_loss])
criterion = MultiTaskVisionLoss()
# Training loop for multi-task learning
def train_multitask_model(model, optimizer, criterion, dataloader, num_epochs=100):
model.train()
for epoch in range(num_epochs):
total_loss = 0
for batch in dataloader:
optimizer.zero_grad()
# Forward pass
features = model.backbone(batch.images)
cls_pred = model.classifier(features)
reg_pred = model.regressor(features)
seg_pred = model.segmenter(features)
# Compute loss
loss = criterion(cls_pred, reg_pred, seg_pred,
batch.cls_targets, batch.reg_targets, batch.seg_targets)
# Backward pass with gradient surgery
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(dataloader)
print(f'Epoch {epoch}, Loss: {avg_loss:.4f}')
These examples demonstrate the power of Torchium’s specialized optimizers and loss functions for various computer vision tasks. Each example shows how to combine different components effectively for optimal performance.