Intermediate

Building Models with nn.Module

Learn PyTorch's module system for building neural networks — from simple sequential models to complex custom architectures with shared layers and skip connections.

The nn.Module Base Class

Every neural network in PyTorch inherits from nn.Module. You define layers in __init__ and the forward pass in forward():

Python
import torch
import torch.nn as nn

class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Create an instance
model = SimpleNet(input_size=784, hidden_size=128, num_classes=10)
print(model)

nn.Sequential for Simple Models

Python
# Quick model building without a custom class
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(128, 10)
)

Common Layers

LayerPurposeExample
nn.LinearFully connected layernn.Linear(in, out)
nn.Conv2d2D convolutionnn.Conv2d(3, 64, kernel_size=3)
nn.LSTMLong short-term memorynn.LSTM(input_size, hidden_size)
nn.BatchNorm2dBatch normalization for CNNsnn.BatchNorm2d(num_features)
nn.EmbeddingLookup table for embeddingsnn.Embedding(vocab_size, dim)
nn.DropoutRegularizationnn.Dropout(p=0.5)

Custom Models with Skip Connections

Python
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(channels, channels, 3, padding=1),
            nn.BatchNorm2d(channels),
            nn.ReLU(),
            nn.Conv2d(channels, channels, 3, padding=1),
            nn.BatchNorm2d(channels)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        residual = x
        out = self.block(x)
        out += residual  # Skip connection!
        return self.relu(out)

class MiniResNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.res1 = ResidualBlock(64)
        self.res2 = ResidualBlock(64)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.res1(x)
        x = self.res2(x)
        x = self.pool(x).flatten(1)
        return self.fc(x)

Parameter Management

Python
model = SimpleNet(784, 128, 10)

# View all parameters
for name, param in model.named_parameters():
    print(f"{name}: {param.shape}")

# Count total parameters
total = sum(p.numel() for p in model.parameters())
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total: {total:,}, Trainable: {trainable:,}")

# Freeze specific layers
for param in model.fc1.parameters():
    param.requires_grad = False

# Move model to GPU
model = model.to('cuda')
Best Practice: Always call model.train() before training and model.eval() before evaluation/inference. This toggles layers like Dropout and BatchNorm between training and evaluation behavior.

Next Up: Training Loop

Now let's write the training loop to actually train these models with optimizers, loss functions, and learning rate schedulers.

Next: Training Loop →