Domain-Specific Usage Guide
This guide covers specialized usage patterns for different domains, showcasing how to leverage Torchium’s domain-specific optimizers and loss functions effectively.
Computer Vision
Object Detection
Advanced Detection Pipeline
import torch
import torch.nn as nn
import torchium
class DetectionModel(nn.Module):
def __init__(self, num_classes=80):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.classifier = nn.Linear(128, num_classes)
self.regressor = nn.Linear(128, 4) # bbox coordinates
model = DetectionModel()
# Use Ranger optimizer for computer vision
optimizer = torchium.optimizers.Ranger(
model.parameters(),
lr=1e-3,
alpha=0.5,
k=6,
N_sma_threshhold=5,
betas=(0.9, 0.999),
eps=1e-8,
weight_decay=1e-4
)
# Advanced detection loss combining multiple IoU variants
class AdvancedDetectionLoss(nn.Module):
def __init__(self):
super().__init__()
self.cls_loss = torchium.losses.FocalLoss(alpha=0.25, gamma=2.0)
self.giou_loss = torchium.losses.GIoULoss()
self.diou_loss = torchium.losses.DIoULoss()
self.ciou_loss = torchium.losses.CIoULoss()
def forward(self, cls_pred, reg_pred, cls_target, reg_target):
cls_loss = self.cls_loss(cls_pred, cls_target)
giou_loss = self.giou_loss(reg_pred, reg_target)
diou_loss = self.diou_loss(reg_pred, reg_target)
ciou_loss = self.ciou_loss(reg_pred, reg_target)
# Weighted combination
total_loss = cls_loss + 0.5 * giou_loss + 0.3 * diou_loss + 0.2 * ciou_loss
return total_loss
criterion = AdvancedDetectionLoss()
Image Segmentation
Medical Image Segmentation
class UNet(nn.Module):
def __init__(self, in_channels=3, out_channels=1):
super().__init__()
self.encoder = nn.Sequential(
nn.Conv2d(in_channels, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.decoder = nn.Sequential(
nn.Conv2d(64, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, out_channels, 1)
)
model = UNet()
# Use SAM for better generalization in medical imaging
optimizer = torchium.optimizers.SAM(
model.parameters(),
lr=1e-3,
rho=0.05,
adaptive=True
)
# Medical segmentation loss combination
class MedicalSegmentationLoss(nn.Module):
def __init__(self):
super().__init__()
self.dice = torchium.losses.DiceLoss(smooth=1e-5)
self.tversky = torchium.losses.TverskyLoss(alpha=0.3, beta=0.7)
self.focal = torchium.losses.FocalLoss(alpha=0.25, gamma=2.0)
self.lovasz = torchium.losses.LovaszLoss()
def forward(self, pred, target):
dice_loss = self.dice(pred, target)
tversky_loss = self.tversky(pred, target)
focal_loss = self.focal(pred, target)
lovasz_loss = self.lovasz(pred, target)
# Medical imaging specific weighting
return (0.4 * dice_loss +
0.3 * tversky_loss +
0.2 * focal_loss +
0.1 * lovasz_loss)
criterion = MedicalSegmentationLoss()
Super Resolution
Perceptual Super Resolution
class SRResNet(nn.Module):
def __init__(self, scale_factor=4):
super().__init__()
self.conv1 = nn.Conv2d(3, 64, 9, padding=4)
self.res_blocks = nn.Sequential(*[
nn.Conv2d(64, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 64, 3, padding=1),
nn.BatchNorm2d(64)
] for _ in range(16))
self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv3 = nn.Conv2d(64, 3 * scale_factor**2, 9, padding=4)
self.pixel_shuffle = nn.PixelShuffle(scale_factor)
model = SRResNet()
# Use Lookahead for stable super resolution training
optimizer = torchium.optimizers.Lookahead(
model.parameters(),
lr=1e-4,
alpha=0.5,
k=5
)
# Perceptual super resolution loss
class PerceptualSRLoss(nn.Module):
def __init__(self):
super().__init__()
self.mse = torchium.losses.MSELoss()
self.perceptual = torchium.losses.PerceptualLoss()
self.ssim = torchium.losses.SSIMLoss()
self.vgg = torchium.losses.VGGLoss()
def forward(self, pred, target):
mse_loss = self.mse(pred, target)
perceptual_loss = self.perceptual(pred, target)
ssim_loss = self.ssim(pred, target)
vgg_loss = self.vgg(pred, target)
return (0.1 * mse_loss +
0.6 * perceptual_loss +
0.2 * ssim_loss +
0.1 * vgg_loss)
criterion = PerceptualSRLoss()
Style Transfer
Neural Style Transfer
class StyleTransferModel(nn.Module):
def __init__(self):
super().__init__()
# Use pre-trained VGG as feature extractor
import torchvision.models as models
vgg = models.vgg19(pretrained=True).features
self.features = nn.ModuleList(vgg[:36]) # Up to conv4_4
model = StyleTransferModel()
# Use Adam with custom parameters for style transfer
optimizer = torchium.optimizers.Adam(
model.parameters(),
lr=1e-3,
betas=(0.9, 0.999),
eps=1e-8
)
# Neural style transfer loss
class NeuralStyleLoss(nn.Module):
def __init__(self):
super().__init__()
self.content_loss = torchium.losses.ContentLoss()
self.style_loss = torchium.losses.StyleLoss()
self.tv_loss = torchium.losses.TotalVariationLoss()
def forward(self, generated, content, style):
content_loss = self.content_loss(generated, content)
style_loss = self.style_loss(generated, style)
tv_loss = self.tv_loss(generated)
return (1.0 * content_loss +
100.0 * style_loss +
0.1 * tv_loss)
criterion = NeuralStyleLoss()
Natural Language Processing
Transformer Training
Large Language Model Training
class TransformerModel(nn.Module):
def __init__(self, vocab_size=50000, d_model=512, nhead=8, num_layers=6):
super().__init__()
self.embedding = nn.Embedding(vocab_size, d_model)
self.pos_encoding = nn.Parameter(torch.randn(1000, d_model))
self.transformer = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model, nhead, batch_first=True),
num_layers
)
self.classifier = nn.Linear(d_model, vocab_size)
model = TransformerModel()
# Use LAMB for large batch training
optimizer = torchium.optimizers.LAMB(
model.parameters(),
lr=1e-3,
betas=(0.9, 0.999),
eps=1e-6,
weight_decay=0.01,
clamp_value=10.0
)
# Advanced NLP loss with label smoothing
criterion = torchium.losses.LabelSmoothingLoss(
num_classes=50000,
smoothing=0.1
)
Sequence-to-Sequence Models
class Seq2SeqModel(nn.Module):
def __init__(self, input_vocab_size, output_vocab_size, d_model=512):
super().__init__()
self.encoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model, 8, batch_first=True),
6
)
self.decoder = nn.TransformerDecoder(
nn.TransformerDecoderLayer(d_model, 8, batch_first=True),
6
)
self.output_projection = nn.Linear(d_model, output_vocab_size)
model = Seq2SeqModel(input_vocab_size=30000, output_vocab_size=30000)
# Use NovoGrad for NLP tasks
optimizer = torchium.optimizers.NovoGrad(
model.parameters(),
lr=1e-3,
betas=(0.9, 0.999),
eps=1e-8,
weight_decay=0.01,
grad_averaging=True
)
# Combined loss for seq2seq
class Seq2SeqLoss(nn.Module):
def __init__(self):
super().__init__()
self.ce_loss = torchium.losses.CrossEntropyLoss()
self.label_smoothing = torchium.losses.LabelSmoothingLoss(
num_classes=30000, smoothing=0.1
)
def forward(self, pred, target):
ce_loss = self.ce_loss(pred, target)
smooth_loss = self.label_smoothing(pred, target)
return 0.7 * ce_loss + 0.3 * smooth_loss
criterion = Seq2SeqLoss()
Word Embeddings
Word2Vec Training
class Word2VecModel(nn.Module):
def __init__(self, vocab_size, embedding_dim=300):
super().__init__()
self.target_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.context_embeddings = nn.Embedding(vocab_size, embedding_dim)
model = Word2VecModel(vocab_size=100000)
# Use SGD for word embeddings
optimizer = torchium.optimizers.SGD(
model.parameters(),
lr=0.025,
momentum=0.9
)
# Word2Vec specific loss
criterion = torchium.losses.Word2VecLoss(
vocab_size=100000,
embedding_dim=300,
negative_samples=5
)
Generative Models
GAN Training
Advanced GAN Training
class Generator(nn.Module):
def __init__(self, latent_dim=100, output_dim=784):
super().__init__()
self.net = nn.Sequential(
nn.Linear(latent_dim, 256),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Linear(256, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512, 1024),
nn.BatchNorm1d(1024),
nn.ReLU(),
nn.Linear(1024, output_dim),
nn.Tanh()
)
class Discriminator(nn.Module):
def __init__(self, input_dim=784):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 1024),
nn.LeakyReLU(0.2),
nn.Dropout(0.3),
nn.Linear(1024, 512),
nn.LeakyReLU(0.2),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.LeakyReLU(0.2),
nn.Dropout(0.3),
nn.Linear(256, 1),
nn.Sigmoid()
)
generator = Generator()
discriminator = Discriminator()
# Different optimizers for G and D
g_optimizer = torchium.optimizers.Adam(
generator.parameters(),
lr=2e-4,
betas=(0.5, 0.999)
)
d_optimizer = torchium.optimizers.Adam(
discriminator.parameters(),
lr=2e-4,
betas=(0.5, 0.999)
)
# Advanced GAN loss
class AdvancedGANLoss(nn.Module):
def __init__(self):
super().__init__()
self.gan_loss = torchium.losses.GANLoss()
self.wasserstein_loss = torchium.losses.WassersteinLoss()
self.hinge_loss = torchium.losses.HingeGANLoss()
def forward(self, fake_pred, real_pred, loss_type='gan'):
if loss_type == 'gan':
return self.gan_loss(fake_pred, real_pred)
elif loss_type == 'wasserstein':
return self.wasserstein_loss(fake_pred, real_pred)
elif loss_type == 'hinge':
return self.hinge_loss(fake_pred, real_pred)
criterion = AdvancedGANLoss()
VAE Training
Beta-VAE for Disentangled Representations
class BetaVAE(nn.Module):
def __init__(self, input_dim=784, latent_dim=20, beta=1.0):
super().__init__()
self.beta = beta
self.encoder = nn.Sequential(
nn.Linear(input_dim, 400),
nn.ReLU(),
nn.Linear(400, 400),
nn.ReLU()
)
self.mu = nn.Linear(400, latent_dim)
self.log_var = nn.Linear(400, latent_dim)
self.decoder = nn.Sequential(
nn.Linear(latent_dim, 400),
nn.ReLU(),
nn.Linear(400, 400),
nn.ReLU(),
nn.Linear(400, input_dim),
nn.Sigmoid()
)
model = BetaVAE(beta=4.0)
# Use AdaBelief for stable VAE training
optimizer = torchium.optimizers.AdaBelief(
model.parameters(),
lr=1e-3,
betas=(0.9, 0.999),
eps=1e-8,
weight_decay=1e-4
)
# Beta-VAE loss
criterion = torchium.losses.BetaVAELoss(beta=4.0)
Diffusion Models
DDPM Training
class DiffusionModel(nn.Module):
def __init__(self, input_dim=784, hidden_dim=512):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim + 1, hidden_dim), # +1 for timestep
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
model = DiffusionModel()
# Use AdamW for diffusion models
optimizer = torchium.optimizers.AdamW(
model.parameters(),
lr=1e-4,
betas=(0.9, 0.999),
eps=1e-8,
weight_decay=1e-4
)
# DDPM loss
criterion = torchium.losses.DDPMLoss()
Metric Learning
Face Recognition
ArcFace for Face Recognition
class FaceRecognitionModel(nn.Module):
def __init__(self, embedding_dim=512, num_classes=1000):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1)
)
self.embedding = nn.Linear(64, embedding_dim)
self.classifier = nn.Linear(embedding_dim, num_classes)
model = FaceRecognitionModel()
# Use Lion for memory efficiency
optimizer = torchium.optimizers.Lion(
model.parameters(),
lr=1e-4,
betas=(0.9, 0.99),
weight_decay=1e-2
)
# ArcFace loss for face recognition
criterion = torchium.losses.ArcFaceMetricLoss(
num_classes=1000,
embedding_size=512,
margin=0.5,
scale=64
)
Contrastive Learning
SimCLR-style Training
class ContrastiveModel(nn.Module):
def __init__(self, input_dim=2048, hidden_dim=512, output_dim=128):
super().__init__()
self.projector = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim)
)
model = ContrastiveModel()
# Use LARS for contrastive learning
optimizer = torchium.optimizers.LARS(
model.parameters(),
lr=1e-3,
momentum=0.9,
weight_decay=1e-4
)
# Contrastive loss
criterion = torchium.losses.ContrastiveMetricLoss(
temperature=0.1,
margin=1.0
)
Multi-Task Learning
Uncertainty Weighting
Multi-Task Computer Vision
class MultiTaskVisionModel(nn.Module):
def __init__(self):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1)
)
self.classifier = nn.Linear(64, 10) # Classification
self.regressor = nn.Linear(64, 1) # Regression
self.segmenter = nn.Linear(64, 21) # Segmentation
model = MultiTaskVisionModel()
# Use PCGrad for gradient surgery
optimizer = torchium.optimizers.PCGrad(
model.parameters(),
lr=1e-3
)
# Multi-task loss with uncertainty weighting
class MultiTaskVisionLoss(nn.Module):
def __init__(self):
super().__init__()
self.uncertainty_loss = torchium.losses.UncertaintyWeightingLoss(num_tasks=3)
self.cls_loss = torchium.losses.CrossEntropyLoss()
self.reg_loss = torchium.losses.MSELoss()
self.seg_loss = torchium.losses.DiceLoss()
def forward(self, cls_pred, reg_pred, seg_pred, cls_target, reg_target, seg_target):
cls_loss = self.cls_loss(cls_pred, cls_target)
reg_loss = self.reg_loss(reg_pred, reg_target)
seg_loss = self.seg_loss(seg_pred, seg_target)
return self.uncertainty_loss([cls_loss, reg_loss, seg_loss])
criterion = MultiTaskVisionLoss()
Domain-Specific Applications
Medical Imaging
Medical Image Analysis
class MedicalImageModel(nn.Module):
def __init__(self):
super().__init__()
self.encoder = nn.Sequential(
nn.Conv2d(1, 32, 3, padding=1), # Grayscale medical images
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.classifier = nn.Linear(64, 2) # Binary classification
model = MedicalImageModel()
# Use SAM for better generalization in medical imaging
optimizer = torchium.optimizers.SAM(
model.parameters(),
lr=1e-3,
rho=0.05
)
# Medical imaging specific loss
class MedicalImagingLoss(nn.Module):
def __init__(self):
super().__init__()
self.focal = torchium.losses.FocalLoss(alpha=0.25, gamma=2.0)
self.dice = torchium.losses.DiceLoss(smooth=1e-5)
def forward(self, pred, target):
focal_loss = self.focal(pred, target)
dice_loss = self.dice(pred, target)
return 0.7 * focal_loss + 0.3 * dice_loss
criterion = MedicalImagingLoss()
Audio Processing
Audio Classification
class AudioModel(nn.Module):
def __init__(self, input_dim=128, num_classes=10):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 256),
nn.ReLU(),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, num_classes)
)
model = AudioModel()
# Use AdaBelief for audio processing
optimizer = torchium.optimizers.AdaBelief(
model.parameters(),
lr=1e-3
)
# Audio processing loss
criterion = torchium.losses.SpectralLoss()
Time Series
Time Series Forecasting
class TimeSeriesModel(nn.Module):
def __init__(self, input_dim=10, hidden_dim=64, output_dim=1):
super().__init__()
self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
self.classifier = nn.Linear(hidden_dim, output_dim)
model = TimeSeriesModel()
# Use AdamW for time series
optimizer = torchium.optimizers.AdamW(
model.parameters(),
lr=1e-3,
weight_decay=1e-4
)
# DTW loss for time series
criterion = torchium.losses.DTWLoss()
Best Practices by Domain
- Computer Vision:
Use Ranger or Lookahead for vision tasks
Combine multiple IoU losses for detection
Use perceptual losses for super resolution
Apply SAM for better generalization
- Natural Language Processing:
Use LAMB for large batch training
Apply label smoothing for better generalization
Use NovoGrad for transformer models
Consider gradient clipping for stability
- Generative Models:
Use different optimizers for G and D
Apply appropriate GAN loss variants
Use AdaBelief for stable VAE training
Consider beta scheduling for Beta-VAE
- Metric Learning:
Use Lion for memory efficiency
Apply ArcFace for face recognition
Use LARS for contrastive learning
Consider temperature scaling
- Multi-Task Learning:
Use PCGrad for gradient surgery
Apply uncertainty weighting
Use appropriate loss combinations
Monitor task-specific performance
- Domain-Specific:
Use SAM for medical imaging
Apply spectral losses for audio
Use DTW for time series
Consider domain-specific augmentations