Intermediate
MLflow Tracking
Master the MLflow Tracking API — log parameters, metrics, and artifacts, use autologging, and compare experiments.
Experiments and Runs
MLflow organizes tracking into two levels:
- Experiment: A named collection of runs, typically for a specific ML task (e.g., "customer-churn-prediction").
- Run: A single execution of training code within an experiment. Each run tracks parameters, metrics, artifacts, and tags.
Logging Parameters
Python — Logging parameters
import mlflow
with mlflow.start_run():
# Log individual parameters
mlflow.log_param("learning_rate", 0.01)
mlflow.log_param("batch_size", 32)
mlflow.log_param("model_type", "gradient_boosting")
# Log multiple parameters at once
params = {
"n_estimators": 200,
"max_depth": 8,
"min_samples_split": 5,
"subsample": 0.8,
}
mlflow.log_params(params)
Logging Metrics
Python — Logging metrics
with mlflow.start_run():
# Log a single metric
mlflow.log_metric("accuracy", 0.95)
mlflow.log_metric("f1_score", 0.93)
# Log metrics over time (e.g., per epoch)
for epoch in range(100):
train_loss = train_one_epoch(model, train_loader)
val_loss = evaluate(model, val_loader)
mlflow.log_metric("train_loss", train_loss, step=epoch)
mlflow.log_metric("val_loss", val_loss, step=epoch)
# Log multiple metrics at once
mlflow.log_metrics({
"precision": 0.94,
"recall": 0.92,
"auc": 0.97,
})
Logging Artifacts
Python — Logging artifacts
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
with mlflow.start_run():
# Log a single file
mlflow.log_artifact("config.yaml")
# Log a directory of files
mlflow.log_artifacts("plots/", artifact_path="figures")
# Create and log a plot
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots()
ConfusionMatrixDisplay(cm).plot(ax=ax)
fig.savefig("confusion_matrix.png")
mlflow.log_artifact("confusion_matrix.png")
# Log a text file
with open("feature_importance.txt", "w") as f:
for name, imp in zip(feature_names, importances):
f.write(f"{name}: {imp:.4f}\n")
mlflow.log_artifact("feature_importance.txt")
Autologging
MLflow can automatically log parameters, metrics, and models for supported frameworks:
Python — Autologging
# Enable autologging for all supported frameworks
mlflow.autolog()
# Or enable for specific frameworks
mlflow.sklearn.autolog()
mlflow.pytorch.autolog()
mlflow.tensorflow.autolog()
mlflow.xgboost.autolog()
mlflow.lightgbm.autolog()
# Now just train normally - everything is logged automatically
from sklearn.ensemble import RandomForestClassifier
with mlflow.start_run():
model = RandomForestClassifier(n_estimators=100, max_depth=10)
model.fit(X_train, y_train)
# Parameters, metrics, model, and feature importance are all logged!
Autolog captures: All constructor parameters, training metrics (accuracy, F1, etc.), the trained model artifact, feature importance plots, and even the model signature. It's the fastest way to start tracking.
Custom Tags
Python — Using tags
with mlflow.start_run():
# Set individual tags
mlflow.set_tag("team", "fraud-detection")
mlflow.set_tag("data_version", "v2.3")
mlflow.set_tag("environment", "staging")
# Set multiple tags
mlflow.set_tags({
"model_type": "ensemble",
"feature_set": "v3",
"priority": "high",
})
Searching and Comparing Runs
Python — Searching runs programmatically
import mlflow
# Search runs with filters
runs = mlflow.search_runs(
experiment_names=["customer-churn"],
filter_string="metrics.accuracy > 0.9 AND params.model_type = 'gradient_boosting'",
order_by=["metrics.f1_score DESC"],
max_results=10,
)
# Display results
print(runs[["run_id", "params.model_type", "metrics.accuracy", "metrics.f1_score"]])
# Get the best run
best_run = runs.iloc[0]
print(f"Best run: {best_run.run_id}")
print(f" Accuracy: {best_run['metrics.accuracy']:.4f}")
print(f" F1 Score: {best_run['metrics.f1_score']:.4f}")
Framework Examples
scikit-learn
Python — MLflow with scikit-learn
import mlflow
mlflow.sklearn.autolog()
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
with mlflow.start_run():
pipe = Pipeline([
("scaler", StandardScaler()),
("classifier", LogisticRegression(C=1.0, max_iter=200))
])
pipe.fit(X_train, y_train)
# Everything logged automatically
PyTorch
Python — MLflow with PyTorch
import mlflow
import torch
import torch.nn as nn
mlflow.pytorch.autolog()
with mlflow.start_run():
model = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 10),
)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
for epoch in range(10):
for batch_x, batch_y in train_loader:
optimizer.zero_grad()
output = model(batch_x)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
mlflow.log_metric("train_loss", loss.item(), step=epoch)
TensorFlow / Keras
Python — MLflow with TensorFlow
import mlflow
import tensorflow as tf
mlflow.tensorflow.autolog()
with mlflow.start_run():
model = tf.keras.Sequential([
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax'),
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_split=0.2)
# All metrics, params, and model logged automatically
Lilly Tech Systems