Chatbot Architecture Patterns Beginner

Before writing a single line of chatbot code, you need to choose the right architecture. This lesson covers the three dominant patterns — intent-based, LLM-based, and hybrid — with honest trade-offs, production considerations, and a reference architecture you can adapt for any chatbot project.

Three Architecture Patterns

Pattern	How It Works	Best For	Latency	Cost per Message
Intent-Based (NLU)	Classify user intent → extract slots → execute handler	Structured workflows (order status, booking)	50-200ms	~$0.001
LLM-Based	Send conversation history to LLM → generate response	Open-ended Q&A, creative tasks	1-10s	$0.01-0.10
Hybrid	Route by confidence: high-confidence intents go to handlers, fallback to LLM	Production systems needing both speed and flexibility	Varies	$0.002-0.05

Intent-Based Architecture

The traditional approach used by Dialogflow, Lex, and Rasa. Fast, predictable, and cheap — but limited to predefined flows.

# Intent-based chatbot router (production pattern)
from dataclasses import dataclass
from typing import Dict, List, Optional, Callable
import re

@dataclass
class Intent:
    name: str
    patterns: List[str]         # Training phrases
    handler: Callable           # Function to execute
    required_slots: List[str]   # Parameters to extract
    confidence_threshold: float = 0.85

@dataclass
class ConversationState:
    session_id: str
    current_intent: Optional[str] = None
    slots: Dict[str, str] = None
    turn_count: int = 0
    context: Dict = None

    def __post_init__(self):
        self.slots = self.slots or {}
        self.context = self.context or {}

class IntentRouter:
    """Production intent router with slot filling."""

    def __init__(self):
        self.intents: Dict[str, Intent] = {}
        self.sessions: Dict[str, ConversationState] = {}

    def register_intent(self, intent: Intent):
        self.intents[intent.name] = intent

    def classify(self, text: str) -> tuple[str, float]:
        """Classify user message to intent + confidence.
        In production, replace with a trained NLU model (Rasa, BERT, etc.)."""
        best_intent, best_score = None, 0.0
        text_lower = text.lower()
        for name, intent in self.intents.items():
            for pattern in intent.patterns:
                if pattern.lower() in text_lower:
                    score = len(pattern) / len(text_lower)
                    if score > best_score:
                        best_intent, best_score = name, min(score, 1.0)
        return best_intent, best_score

    def extract_slots(self, text: str, intent: Intent) -> Dict[str, str]:
        """Extract required slots from user message."""
        slots = {}
        # Example: extract order ID pattern
        order_match = re.search(r'ORD-\d{6}', text)
        if order_match and 'order_id' in intent.required_slots:
            slots['order_id'] = order_match.group()
        # Example: extract email
        email_match = re.search(r'[\w.-]+@[\w.-]+\.\w+', text)
        if email_match and 'email' in intent.required_slots:
            slots['email'] = email_match.group()
        return slots

    def process(self, session_id: str, message: str) -> str:
        # Get or create session
        if session_id not in self.sessions:
            self.sessions[session_id] = ConversationState(session_id=session_id)
        state = self.sessions[session_id]
        state.turn_count += 1

        # If we're in slot-filling mode, try to fill missing slots
        if state.current_intent:
            intent = self.intents[state.current_intent]
            new_slots = self.extract_slots(message, intent)
            state.slots.update(new_slots)
            missing = [s for s in intent.required_slots if s not in state.slots]
            if missing:
                return f"I still need your {missing[0]}. Could you provide it?"
            # All slots filled - execute handler
            response = intent.handler(state.slots)
            state.current_intent = None
            state.slots = {}
            return response

        # Classify new intent
        intent_name, confidence = self.classify(message)
        if intent_name and confidence >= self.intents[intent_name].confidence_threshold:
            intent = self.intents[intent_name]
            slots = self.extract_slots(message, intent)
            missing = [s for s in intent.required_slots if s not in slots]
            if missing:
                state.current_intent = intent_name
                state.slots = slots
                return f"Sure, I can help with that. What is your {missing[0]}?"
            return intent.handler(slots)

        return "I'm not sure I understand. Could you rephrase that?"


# --- Usage Example ---
def handle_order_status(slots: Dict) -> str:
    return f"Order {slots['order_id']}: Shipped, arriving tomorrow."

def handle_refund(slots: Dict) -> str:
    return f"Refund initiated for order {slots['order_id']}. Check {slots['email']} for confirmation."

router = IntentRouter()
router.register_intent(Intent(
    name="order_status",
    patterns=["where is my order", "track order", "order status"],
    handler=handle_order_status,
    required_slots=["order_id"]
))
router.register_intent(Intent(
    name="refund",
    patterns=["refund", "return my order", "money back"],
    handler=handle_refund,
    required_slots=["order_id", "email"]
))

LLM-Based Architecture

Send the full conversation history to an LLM. Simple to build, handles open-ended queries well, but requires careful prompt engineering and cost management.

# LLM-based chatbot with structured system prompt
import openai
from typing import List, Dict

class LLMChatbot:
    """Production LLM chatbot with conversation management."""

    def __init__(self, model: str = "gpt-4o", max_history: int = 20):
        self.client = openai.OpenAI()
        self.model = model
        self.max_history = max_history
        self.sessions: Dict[str, List[Dict]] = {}
        self.system_prompt = """You are a customer support agent for Acme Corp.

RULES:
1. Only answer questions about Acme products and services
2. Never make up order information - always use the lookup tool
3. If you cannot help, offer to connect to a human agent
4. Keep responses under 3 sentences unless the user asks for detail
5. Never share internal policies or pricing formulas

TONE: Professional, friendly, concise."""

    def get_history(self, session_id: str) -> List[Dict]:
        if session_id not in self.sessions:
            self.sessions[session_id] = []
        return self.sessions[session_id]

    def trim_history(self, history: List[Dict]) -> List[Dict]:
        """Keep conversation within context window budget."""
        if len(history) > self.max_history:
            # Always keep system prompt context, trim oldest user/assistant pairs
            return history[-(self.max_history):]
        return history

    def chat(self, session_id: str, user_message: str) -> str:
        history = self.get_history(session_id)
        history.append({"role": "user", "content": user_message})
        history = self.trim_history(history)

        messages = [{"role": "system", "content": self.system_prompt}] + history

        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            temperature=0.3,       # Lower = more consistent
            max_tokens=500,        # Cap response length
            top_p=0.9
        )

        assistant_msg = response.choices[0].message.content
        history.append({"role": "assistant", "content": assistant_msg})
        self.sessions[session_id] = history
        return assistant_msg

Hybrid Architecture (Recommended for Production)

Most production chatbots use a hybrid: route high-confidence intents to fast deterministic handlers, and fall back to an LLM for everything else.

# Hybrid router: intent-first with LLM fallback
class HybridChatbot:
    """Production hybrid: intent router + LLM fallback."""

    def __init__(self, intent_router: IntentRouter, llm_chatbot: LLMChatbot,
                 confidence_threshold: float = 0.80):
        self.intent_router = intent_router
        self.llm = llm_chatbot
        self.confidence_threshold = confidence_threshold

    def process(self, session_id: str, message: str) -> dict:
        # Step 1: Try intent classification
        intent_name, confidence = self.intent_router.classify(message)

        if intent_name and confidence >= self.confidence_threshold:
            response = self.intent_router.process(session_id, message)
            return {
                "response": response,
                "source": "intent",
                "intent": intent_name,
                "confidence": confidence,
                "latency_budget": "fast"   # typically <200ms
            }

        # Step 2: Fall back to LLM
        response = self.llm.chat(session_id, message)
        return {
            "response": response,
            "source": "llm",
            "intent": None,
            "confidence": 0.0,
            "latency_budget": "standard"   # typically 1-5s
        }

Session and Multi-Turn Context Management

Every production chatbot needs session management. Sessions track who the user is, what they've said, and where they are in a conversation flow.

# Production session store (Redis-backed)
import json
import time
from typing import Optional

class SessionStore:
    """Redis-backed session store for production chatbots."""

    def __init__(self, redis_client, ttl_seconds: int = 3600):
        self.redis = redis_client
        self.ttl = ttl_seconds

    def get_session(self, session_id: str) -> Optional[dict]:
        data = self.redis.get(f"chat:session:{session_id}")
        if data:
            session = json.loads(data)
            session["last_accessed"] = time.time()
            self.save_session(session_id, session)
            return session
        return None

    def create_session(self, session_id: str, user_id: str,
                       channel: str) -> dict:
        session = {
            "session_id": session_id,
            "user_id": user_id,
            "channel": channel,
            "created_at": time.time(),
            "last_accessed": time.time(),
            "turn_count": 0,
            "messages": [],
            "context": {},            # Extracted entities, preferences
            "active_intent": None,
            "slots": {}
        }
        self.save_session(session_id, session)
        return session

    def save_session(self, session_id: str, session: dict):
        self.redis.setex(
            f"chat:session:{session_id}",
            self.ttl,
            json.dumps(session)
        )

    def add_message(self, session_id: str, role: str, content: str,
                    metadata: dict = None):
        session = self.get_session(session_id)
        if session:
            session["messages"].append({
                "role": role,
                "content": content,
                "timestamp": time.time(),
                "metadata": metadata or {}
            })
            session["turn_count"] += 1
            self.save_session(session_id, session)

Architecture Decision Framework

Choose Intent-Based when: You have fewer than 50 well-defined flows, need sub-200ms latency, want deterministic behavior, or are on a tight budget. Examples: order tracking, appointment booking, FAQ bots.

Choose LLM-Based when: Queries are open-ended, you need to reason over documents/knowledge bases, the bot must handle unpredictable questions, or you want rapid iteration without retraining. Examples: knowledge assistants, coding helpers, creative writing bots.

Choose Hybrid when: You need both structured workflows AND open-ended capability. This is the most common production pattern. Route high-confidence intents to fast handlers, fall back to LLM for everything else. Examples: customer support, internal IT helpdesks, e-commerce assistants.

← Course Overview Conversation Engine Design →