Intermediate

Implement Caches

"Design an LRU cache" is the single most asked system implementation question in coding interviews. This lesson covers four cache implementations: LRU with O(1) operations, LFU for frequency-based eviction, TTL for time-based expiration, and write-through for consistency with a backing store.

Problem 1: LRU Cache

Implement a Least Recently Used cache where both get and put operations run in O(1) time. This is LeetCode #146 and appears in interviews at Google, Meta, Amazon, and Microsoft.

from collections import OrderedDict
from typing import Any, Optional


class LRUCache:
    """Least Recently Used cache with O(1) get and put.

    Implementation: OrderedDict (HashMap + DoublyLinkedList)
    - get(): O(1) lookup + move_to_end
    - put(): O(1) insert/update + evict if over capacity

    The key insight: OrderedDict.move_to_end() is O(1) and
    OrderedDict.popitem(last=False) removes the least recent item.
    """

    def __init__(self, capacity: int):
        if capacity <= 0:
            raise ValueError("Capacity must be positive")
        self._capacity = capacity
        self._cache: OrderedDict = OrderedDict()
        self._hits = 0
        self._misses = 0

    def get(self, key: str) -> Optional[Any]:
        """Get value by key. Returns None on miss."""
        if key not in self._cache:
            self._misses += 1
            return None

        # Move to end (most recently used)
        self._cache.move_to_end(key)
        self._hits += 1
        return self._cache[key]

    def put(self, key: str, value: Any) -> Optional[tuple]:
        """Insert or update key-value pair.

        Returns the evicted (key, value) tuple if eviction occurred,
        otherwise None.
        """
        evicted = None

        if key in self._cache:
            # Update existing: move to end
            self._cache.move_to_end(key)
            self._cache[key] = value
        else:
            # Insert new: check capacity
            if len(self._cache) >= self._capacity:
                evicted = self._cache.popitem(last=False)

            self._cache[key] = value

        return evicted

    def delete(self, key: str) -> bool:
        """Remove a key from the cache. Returns True if key existed."""
        if key in self._cache:
            del self._cache[key]
            return True
        return False

    def size(self) -> int:
        return len(self._cache)

    def hit_rate(self) -> float:
        total = self._hits + self._misses
        return self._hits / total if total > 0 else 0.0

    def __repr__(self) -> str:
        items = list(self._cache.items())
        return f"LRUCache(capacity={self._capacity}, items={items})"


# ---- Usage ----
cache = LRUCache(capacity=3)

cache.put("a", 1)
cache.put("b", 2)
cache.put("c", 3)
print(cache)  # a, b, c

cache.get("a")          # Access 'a' -> moves to end
print(cache)            # b, c, a

evicted = cache.put("d", 4)  # Evicts 'b' (least recent)
print(f"Evicted: {evicted}")  # ('b', 2)
print(cache)            # c, a, d

print(f"Get 'b': {cache.get('b')}")  # None (evicted)
print(f"Hit rate: {cache.hit_rate():.1%}")

Problem 2: LFU Cache

Implement a Least Frequently Used cache. When evicting, remove the key with the lowest access frequency. On ties, remove the least recently used among the least frequent.

from collections import defaultdict, OrderedDict


class LFUCache:
    """Least Frequently Used cache with O(1) get and put.

    Data structures:
    - key_to_val: key -> value
    - key_to_freq: key -> access frequency
    - freq_to_keys: frequency -> OrderedDict of keys (for LRU tie-breaking)
    - min_freq: tracks the current minimum frequency

    The trick: when evicting, use min_freq to find the least frequent
    bucket, then pop the first item (LRU) from that bucket's OrderedDict.
    """

    def __init__(self, capacity: int):
        if capacity <= 0:
            raise ValueError("Capacity must be positive")
        self._capacity = capacity
        self._key_to_val: Dict[str, Any] = {}
        self._key_to_freq: Dict[str, int] = {}
        self._freq_to_keys: Dict[int, OrderedDict] = defaultdict(OrderedDict)
        self._min_freq = 0

    def _update_freq(self, key: str):
        """Increment frequency of key and update frequency buckets."""
        freq = self._key_to_freq[key]
        new_freq = freq + 1

        # Remove from old frequency bucket
        del self._freq_to_keys[freq][key]
        if not self._freq_to_keys[freq]:
            del self._freq_to_keys[freq]
            if self._min_freq == freq:
                self._min_freq = new_freq

        # Add to new frequency bucket
        self._key_to_freq[key] = new_freq
        self._freq_to_keys[new_freq][key] = None

    def get(self, key: str) -> Optional[Any]:
        if key not in self._key_to_val:
            return None

        self._update_freq(key)
        return self._key_to_val[key]

    def put(self, key: str, value: Any) -> Optional[str]:
        """Insert or update. Returns evicted key if eviction occurred."""
        if key in self._key_to_val:
            self._key_to_val[key] = value
            self._update_freq(key)
            return None

        # Evict if at capacity
        evicted_key = None
        if len(self._key_to_val) >= self._capacity:
            # Remove LRU key from the minimum frequency bucket
            evicted_key, _ = self._freq_to_keys[self._min_freq].popitem(last=False)
            if not self._freq_to_keys[self._min_freq]:
                del self._freq_to_keys[self._min_freq]
            del self._key_to_val[evicted_key]
            del self._key_to_freq[evicted_key]

        # Insert new key with frequency 1
        self._key_to_val[key] = value
        self._key_to_freq[key] = 1
        self._freq_to_keys[1][key] = None
        self._min_freq = 1

        return evicted_key

    def size(self) -> int:
        return len(self._key_to_val)


# ---- Usage ----
cache = LFUCache(capacity=3)

cache.put("a", 1)
cache.put("b", 2)
cache.put("c", 3)

cache.get("a")  # freq: a=2, b=1, c=1
cache.get("a")  # freq: a=3, b=1, c=1
cache.get("b")  # freq: a=3, b=2, c=1

evicted = cache.put("d", 4)  # Evicts 'c' (freq=1, least frequent)
print(f"Evicted: {evicted}")  # 'c'

print(f"Get 'c': {cache.get('c')}")  # None (evicted)
print(f"Get 'a': {cache.get('a')}")  # 1 (still cached, high frequency)

Problem 3: TTL Cache

Implement a cache where entries expire after a configurable time-to-live. Expired entries are evicted lazily on access and eagerly via periodic cleanup.

import time
import heapq
from dataclasses import dataclass, field


class TTLCache:
    """Cache with Time-To-Live expiration.

    Each entry has an expiration timestamp. Expired entries are
    cleaned up both lazily (on get) and eagerly (via cleanup).

    Data structures:
    - _store: key -> (value, expire_time)
    - _expiry_heap: min-heap of (expire_time, key) for efficient cleanup
    """

    def __init__(self, default_ttl: int = 300, max_size: int = 1000):
        """
        Args:
            default_ttl: Default TTL in seconds
            max_size: Maximum number of entries (evict oldest on overflow)
        """
        self._default_ttl = default_ttl
        self._max_size = max_size
        self._store: Dict[str, tuple] = {}  # key -> (value, expire_time)
        self._expiry_heap: list = []  # min-heap of (expire_time, key)

    def get(self, key: str) -> Optional[Any]:
        """Get value by key. Returns None if missing or expired."""
        if key not in self._store:
            return None

        value, expire_time = self._store[key]

        # Lazy expiration check
        if time.time() > expire_time:
            del self._store[key]
            return None

        return value

    def put(self, key: str, value: Any, ttl: Optional[int] = None):
        """Set key with value and optional custom TTL."""
        ttl = ttl if ttl is not None else self._default_ttl
        expire_time = time.time() + ttl

        # Evict expired entries if at capacity
        if len(self._store) >= self._max_size and key not in self._store:
            self.cleanup()
            # If still at capacity after cleanup, evict soonest-to-expire
            if len(self._store) >= self._max_size:
                self._evict_next()

        self._store[key] = (value, expire_time)
        heapq.heappush(self._expiry_heap, (expire_time, key))

    def delete(self, key: str) -> bool:
        """Remove a key. Returns True if it existed and was not expired."""
        if key in self._store:
            _, expire_time = self._store[key]
            del self._store[key]
            return time.time() <= expire_time
        return False

    def ttl_remaining(self, key: str) -> Optional[float]:
        """Return remaining TTL in seconds, or None if key doesn't exist."""
        if key not in self._store:
            return None
        _, expire_time = self._store[key]
        remaining = expire_time - time.time()
        return max(0.0, remaining) if remaining > 0 else None

    def cleanup(self) -> int:
        """Remove all expired entries. Returns count of removed entries."""
        now = time.time()
        removed = 0

        while self._expiry_heap:
            expire_time, key = self._expiry_heap[0]
            if expire_time > now:
                break

            heapq.heappop(self._expiry_heap)

            # Only delete if the entry still exists with this expiration
            if key in self._store:
                stored_value, stored_expire = self._store[key]
                if stored_expire <= now:
                    del self._store[key]
                    removed += 1

        return removed

    def _evict_next(self):
        """Evict the entry closest to expiration."""
        while self._expiry_heap:
            expire_time, key = heapq.heappop(self._expiry_heap)
            if key in self._store:
                del self._store[key]
                return

    def size(self) -> int:
        return len(self._store)


# ---- Usage ----
cache = TTLCache(default_ttl=5, max_size=100)

cache.put("session_1", {"user": "alice"}, ttl=2)
cache.put("session_2", {"user": "bob"}, ttl=10)

print(f"session_1: {cache.get('session_1')}")  # {'user': 'alice'}
print(f"TTL remaining: {cache.ttl_remaining('session_1'):.1f}s")

# After TTL expires, entry is gone (simulated with short TTL)
# time.sleep(3)
# print(f"session_1 after TTL: {cache.get('session_1')}")  # None

Problem 4: Write-Through Cache

Implement a cache that writes to both the cache and a backing store on every write. Reads check the cache first, then fall through to the backing store on miss.

from typing import Callable


class WriteThroughCache:
    """Write-through cache with a backing data store.

    On write: update cache AND backing store (synchronously)
    On read: check cache first, on miss read from backing store
             and populate cache (read-through)

    Trade-offs:
    - Pro: Cache and store are always consistent
    - Pro: Read misses are automatically populated
    - Con: Writes are slower (must wait for store write)
    - Con: Unnecessary writes for rarely-read data

    Alternatives:
    - Write-back: write to cache only, flush to store later (faster writes)
    - Write-around: write to store only, populate cache on read miss
    """

    def __init__(self, capacity: int, store_read: Callable,
                 store_write: Callable, store_delete: Callable):
        """
        Args:
            capacity: Max cache size (LRU eviction)
            store_read: fn(key) -> value or None
            store_write: fn(key, value) -> None
            store_delete: fn(key) -> None
        """
        self._cache = LRUCache(capacity)
        self._store_read = store_read
        self._store_write = store_write
        self._store_delete = store_delete

    def get(self, key: str) -> Optional[Any]:
        """Read-through: check cache, fall back to store."""
        # Try cache first
        value = self._cache.get(key)
        if value is not None:
            return value

        # Cache miss: read from backing store
        value = self._store_read(key)
        if value is not None:
            # Populate cache for future reads
            self._cache.put(key, value)

        return value

    def put(self, key: str, value: Any):
        """Write-through: write to both cache and store."""
        # Write to backing store first (source of truth)
        self._store_write(key, value)

        # Then update cache
        self._cache.put(key, value)

    def delete(self, key: str):
        """Delete from both cache and store."""
        self._store_delete(key)
        self._cache.delete(key)

    def invalidate(self, key: str):
        """Remove from cache only (next read will fetch from store)."""
        self._cache.delete(key)

    def cache_hit_rate(self) -> float:
        return self._cache.hit_rate()


# ---- Usage with simulated database ----
class FakeDatabase:
    """Simulated backing store."""
    def __init__(self):
        self._data = {}
        self.read_count = 0
        self.write_count = 0

    def read(self, key):
        self.read_count += 1
        return self._data.get(key)

    def write(self, key, value):
        self.write_count += 1
        self._data[key] = value

    def delete(self, key):
        self._data.pop(key, None)


db = FakeDatabase()
cache = WriteThroughCache(
    capacity=3,
    store_read=db.read,
    store_write=db.write,
    store_delete=db.delete,
)

# Write-through: both cache and DB updated
cache.put("user:1", {"name": "Alice"})
cache.put("user:2", {"name": "Bob"})

# Read from cache (no DB hit)
print(f"user:1 = {cache.get('user:1')}")  # From cache
print(f"DB reads: {db.read_count}")       # 0

# Invalidate and re-read (triggers DB read)
cache.invalidate("user:1")
print(f"user:1 = {cache.get('user:1')}")  # From DB, then cached
print(f"DB reads: {db.read_count}")       # 1

print(f"Cache hit rate: {cache.cache_hit_rate():.1%}")
💡
Interview tip: For LRU cache, use Python's OrderedDict for a clean O(1) solution. If the interviewer says "no OrderedDict," implement a HashMap + DoublyLinkedList manually. Always mention the time complexity of each operation without being asked — this shows you understand why the data structure choice matters.

Key Takeaways

💡
  • LRU cache is the most commonly asked: use OrderedDict for O(1) get/put
  • LFU cache needs three maps: key-to-value, key-to-frequency, frequency-to-keys
  • TTL cache uses lazy expiration on access + eager cleanup via a min-heap
  • Write-through ensures cache-store consistency but adds write latency
  • Always handle edge cases: capacity 0, duplicate keys, eviction on insert
  • Know the trade-offs between write-through, write-back, and write-around strategies