Category 1: Regression & Classification
The first exam category tests your ability to build dense neural networks for regression and classification tasks. You must handle data normalization, choose correct loss functions, and achieve target accuracy thresholds. This lesson includes complete practice models you can run in a Jupyter notebook.
What the Exam Tests
In this category, you will receive a dataset and must build a model that achieves a specific accuracy or loss threshold. The tasks progress from simple linear regression to multi-class classification with multiple features.
tf.keras.layers.Normalization or manual min-max/standard scaling.Practice Model 1: Simple Linear Regression
Build a model that learns the relationship between a single input feature and a continuous output. This is the simplest possible exam task.
import tensorflow as tf
import numpy as np
# ---- Generate synthetic data ----
# Relationship: y = 2x + 1 + noise
np.random.seed(42)
x_train = np.random.uniform(-10, 10, 1000).astype(np.float32)
y_train = 2 * x_train + 1 + np.random.normal(0, 0.5, 1000).astype(np.float32)
x_val = np.random.uniform(-10, 10, 200).astype(np.float32)
y_val = 2 * x_val + 1 + np.random.normal(0, 0.5, 200).astype(np.float32)
# ---- Build the model ----
model = tf.keras.Sequential([
tf.keras.layers.Dense(16, activation='relu', input_shape=(1,)),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(1) # No activation for regression
])
# ---- Compile ----
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='mse',
metrics=['mae']
)
# ---- Train ----
history = model.fit(
x_train, y_train,
validation_data=(x_val, y_val),
epochs=50,
batch_size=32,
callbacks=[
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
restore_best_weights=True
)
]
)
# ---- Evaluate ----
test_loss, test_mae = model.evaluate(x_val, y_val)
print(f"Validation MAE: {test_mae:.4f}")
# ---- Save for exam submission ----
model.save('linear_regression.h5')
Practice Model 2: Multi-Feature Regression
A more realistic exam task with multiple input features. The key challenge is proper data normalization.
import tensorflow as tf
import numpy as np
# ---- Simulated housing data (like a simplified exam dataset) ----
np.random.seed(42)
n_samples = 2000
# Features: square_feet, bedrooms, age, distance_to_center
features = np.column_stack([
np.random.uniform(500, 5000, n_samples), # sq ft
np.random.randint(1, 6, n_samples), # bedrooms
np.random.uniform(0, 50, n_samples), # age in years
np.random.uniform(0.5, 30, n_samples) # distance in km
]).astype(np.float32)
# Price = function of features + noise
prices = (
features[:, 0] * 200 + # $200/sq ft
features[:, 1] * 50000 + # $50k/bedroom
features[:, 2] * -2000 + # depreciation
features[:, 3] * -5000 + # distance penalty
np.random.normal(0, 20000, n_samples)
).astype(np.float32)
# ---- Split data ----
split = int(0.8 * n_samples)
x_train, x_val = features[:split], features[split:]
y_train, y_val = prices[:split], prices[split:]
# ---- CRITICAL: Normalize features ----
# Method 1: Using Normalization layer (recommended for exam)
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(x_train)
# ---- Build the model ----
model = tf.keras.Sequential([
normalizer, # Normalization as first layer
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(
optimizer='adam',
loss='mse',
metrics=['mae']
)
history = model.fit(
x_train, y_train,
validation_data=(x_val, y_val),
epochs=100,
batch_size=32,
callbacks=[
tf.keras.callbacks.EarlyStopping(
monitor='val_mae', patience=10,
restore_best_weights=True
)
]
)
model.save('multi_feature_regression.h5')
Practice Model 3: Binary Classification
Build a model that classifies data into two categories. Pay attention to the output activation and loss function.
import tensorflow as tf
import numpy as np
# ---- Generate binary classification data ----
np.random.seed(42)
n_samples = 2000
# Two overlapping clusters
class_0 = np.random.randn(n_samples // 2, 4).astype(np.float32) + np.array([1, 1, 0, 0])
class_1 = np.random.randn(n_samples // 2, 4).astype(np.float32) + np.array([-1, -1, 1, 1])
x_data = np.vstack([class_0, class_1])
y_data = np.array([0] * (n_samples // 2) + [1] * (n_samples // 2), dtype=np.float32)
# Shuffle
indices = np.random.permutation(n_samples)
x_data, y_data = x_data[indices], y_data[indices]
split = int(0.8 * n_samples)
x_train, x_val = x_data[:split], x_data[split:]
y_train, y_val = y_data[:split], y_data[split:]
# ---- Build the model ----
model = tf.keras.Sequential([
tf.keras.layers.Dense(32, activation='relu', input_shape=(4,)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(1, activation='sigmoid') # SIGMOID for binary
])
# ---- CRITICAL: binary_crossentropy for binary classification ----
model.compile(
optimizer='adam',
loss='binary_crossentropy', # NOT categorical_crossentropy
metrics=['accuracy']
)
history = model.fit(
x_train, y_train,
validation_data=(x_val, y_val),
epochs=50,
batch_size=32,
callbacks=[
tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy', patience=5,
restore_best_weights=True
)
]
)
val_loss, val_acc = model.evaluate(x_val, y_val)
print(f"Validation Accuracy: {val_acc:.4f}")
model.save('binary_classification.h5')
Practice Model 4: Multi-Class Classification
Classify data into 3 or more categories. This is the most common task type in the exam.
import tensorflow as tf
import numpy as np
# ---- Multi-class classification (like iris or fashion) ----
np.random.seed(42)
n_samples = 3000
n_classes = 5
# Generate clusters for each class
x_data = []
y_data = []
for i in range(n_classes):
center = np.random.randn(6) * 3
cluster = np.random.randn(n_samples // n_classes, 6).astype(np.float32) + center
x_data.append(cluster)
y_data.extend([i] * (n_samples // n_classes))
x_data = np.vstack(x_data).astype(np.float32)
y_data = np.array(y_data, dtype=np.int32)
# Shuffle
indices = np.random.permutation(len(y_data))
x_data, y_data = x_data[indices], y_data[indices]
split = int(0.8 * len(y_data))
x_train, x_val = x_data[:split], x_data[split:]
y_train, y_val = y_data[:split], y_data[split:]
# ---- Normalize ----
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(x_train)
# ---- Build the model ----
model = tf.keras.Sequential([
normalizer,
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(n_classes, activation='softmax') # SOFTMAX for multi-class
])
# ---- CRITICAL: sparse_categorical_crossentropy when labels are integers ----
# Use categorical_crossentropy when labels are one-hot encoded
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy', # integer labels
metrics=['accuracy']
)
history = model.fit(
x_train, y_train,
validation_data=(x_val, y_val),
epochs=50,
batch_size=32,
callbacks=[
tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy', patience=5,
restore_best_weights=True
)
]
)
val_loss, val_acc = model.evaluate(x_val, y_val)
print(f"Validation Accuracy: {val_acc:.4f}")
model.save('multiclass_classification.h5')
Quick Reference: Loss Functions & Activations
# Cheat sheet for the exam - memorize this table
# Task | Output Activation | Loss Function
# ----------------------- | ----------------- | ----------------------------
# Regression | None (linear) | 'mse' or 'mae'
# Binary classification | 'sigmoid' | 'binary_crossentropy'
# Multi-class (int labels)| 'softmax' | 'sparse_categorical_crossentropy'
# Multi-class (one-hot) | 'softmax' | 'categorical_crossentropy'
# Common mistakes on the exam:
# 1. Using softmax + sparse_categorical for binary (use sigmoid + binary)
# 2. Forgetting to normalize input features
# 3. Using 'accuracy' metric with regression (use 'mae')
# 4. Setting too few epochs (model underfits)
# 5. Not using validation_split or validation_data
Key Takeaways
- Always normalize input features — use
tf.keras.layers.Normalizationadapted on training data - Match your output activation to the task: None for regression, sigmoid for binary, softmax for multi-class
- Match your loss function to the label format:
sparse_categorical_crossentropyfor integer labels - Use
EarlyStoppingwithrestore_best_weights=Trueto prevent overfitting - Save your model as
.h5— this is the format the exam plugin expects - Start simple (fewer layers) and add complexity only if needed — overengineering wastes exam time
Lilly Tech Systems