Intermediate
Model Deployment
Serve models via FastAPI in Docker with health checks and versioning.
FastAPI Model Server
# src/serve.py
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import numpy as np
from typing import List
app = FastAPI(title="ML Model Server")
model = None
@app.on_event("startup")
def load_model():
global model
model = joblib.load("models/model.joblib")
print("Model loaded")
class PredictRequest(BaseModel):
features: List[List[float]]
class PredictResponse(BaseModel):
predictions: List[int]
probabilities: List[List[float]]
@app.get("/health")
def health():
return {"status": "ok", "model_loaded": model is not None}
@app.post("/predict", response_model=PredictResponse)
def predict(req: PredictRequest):
X = np.array(req.features)
preds = model.predict(X).tolist()
probs = model.predict_proba(X).tolist()
return PredictResponse(predictions=preds, probabilities=probs)
@app.get("/model/info")
def model_info():
return {
"type": type(model).__name__,
"n_features": model.n_features_in_,
"classes": model.classes_.tolist(),
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
Dockerfile
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY src/ src/
COPY models/ models/
EXPOSE 8000
CMD ["uvicorn", "src.serve:app", "--host", "0.0.0.0", "--port", "8000"]
# Build and run
docker build -t mlops-model .
docker run -p 8000:8000 mlops-model
# Test
curl -X POST http://localhost:8000/predict \
-H "Content-Type: application/json" \
-d '{"features": [[1.0, 2.0, 3.0, 4.0]]}'
Production: Use Gunicorn with multiple uvicorn workers:
gunicorn src.serve:app -w 4 -k uvicorn.workers.UvicornWorker
Lilly Tech Systems