Intermediate

Model Deployment

Serve models via FastAPI in Docker with health checks and versioning.

FastAPI Model Server

# src/serve.py
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import numpy as np
from typing import List

app = FastAPI(title="ML Model Server")
model = None

@app.on_event("startup")
def load_model():
    global model
    model = joblib.load("models/model.joblib")
    print("Model loaded")

class PredictRequest(BaseModel):
    features: List[List[float]]

class PredictResponse(BaseModel):
    predictions: List[int]
    probabilities: List[List[float]]

@app.get("/health")
def health():
    return {"status": "ok", "model_loaded": model is not None}

@app.post("/predict", response_model=PredictResponse)
def predict(req: PredictRequest):
    X = np.array(req.features)
    preds = model.predict(X).tolist()
    probs = model.predict_proba(X).tolist()
    return PredictResponse(predictions=preds, probabilities=probs)

@app.get("/model/info")
def model_info():
    return {
        "type": type(model).__name__,
        "n_features": model.n_features_in_,
        "classes": model.classes_.tolist(),
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

Dockerfile

FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY src/ src/
COPY models/ models/
EXPOSE 8000
CMD ["uvicorn", "src.serve:app", "--host", "0.0.0.0", "--port", "8000"]
# Build and run
docker build -t mlops-model .
docker run -p 8000:8000 mlops-model

# Test
curl -X POST http://localhost:8000/predict \
  -H "Content-Type: application/json" \
  -d '{"features": [[1.0, 2.0, 3.0, 4.0]]}'
💡
Production: Use Gunicorn with multiple uvicorn workers: gunicorn src.serve:app -w 4 -k uvicorn.workers.UvicornWorker