Implement Caches
"Design an LRU cache" is the single most asked system implementation question in coding interviews. This lesson covers four cache implementations: LRU with O(1) operations, LFU for frequency-based eviction, TTL for time-based expiration, and write-through for consistency with a backing store.
Problem 1: LRU Cache
Implement a Least Recently Used cache where both get and put operations run in O(1) time. This is LeetCode #146 and appears in interviews at Google, Meta, Amazon, and Microsoft.
from collections import OrderedDict
from typing import Any, Optional
class LRUCache:
"""Least Recently Used cache with O(1) get and put.
Implementation: OrderedDict (HashMap + DoublyLinkedList)
- get(): O(1) lookup + move_to_end
- put(): O(1) insert/update + evict if over capacity
The key insight: OrderedDict.move_to_end() is O(1) and
OrderedDict.popitem(last=False) removes the least recent item.
"""
def __init__(self, capacity: int):
if capacity <= 0:
raise ValueError("Capacity must be positive")
self._capacity = capacity
self._cache: OrderedDict = OrderedDict()
self._hits = 0
self._misses = 0
def get(self, key: str) -> Optional[Any]:
"""Get value by key. Returns None on miss."""
if key not in self._cache:
self._misses += 1
return None
# Move to end (most recently used)
self._cache.move_to_end(key)
self._hits += 1
return self._cache[key]
def put(self, key: str, value: Any) -> Optional[tuple]:
"""Insert or update key-value pair.
Returns the evicted (key, value) tuple if eviction occurred,
otherwise None.
"""
evicted = None
if key in self._cache:
# Update existing: move to end
self._cache.move_to_end(key)
self._cache[key] = value
else:
# Insert new: check capacity
if len(self._cache) >= self._capacity:
evicted = self._cache.popitem(last=False)
self._cache[key] = value
return evicted
def delete(self, key: str) -> bool:
"""Remove a key from the cache. Returns True if key existed."""
if key in self._cache:
del self._cache[key]
return True
return False
def size(self) -> int:
return len(self._cache)
def hit_rate(self) -> float:
total = self._hits + self._misses
return self._hits / total if total > 0 else 0.0
def __repr__(self) -> str:
items = list(self._cache.items())
return f"LRUCache(capacity={self._capacity}, items={items})"
# ---- Usage ----
cache = LRUCache(capacity=3)
cache.put("a", 1)
cache.put("b", 2)
cache.put("c", 3)
print(cache) # a, b, c
cache.get("a") # Access 'a' -> moves to end
print(cache) # b, c, a
evicted = cache.put("d", 4) # Evicts 'b' (least recent)
print(f"Evicted: {evicted}") # ('b', 2)
print(cache) # c, a, d
print(f"Get 'b': {cache.get('b')}") # None (evicted)
print(f"Hit rate: {cache.hit_rate():.1%}")
Problem 2: LFU Cache
Implement a Least Frequently Used cache. When evicting, remove the key with the lowest access frequency. On ties, remove the least recently used among the least frequent.
from collections import defaultdict, OrderedDict
class LFUCache:
"""Least Frequently Used cache with O(1) get and put.
Data structures:
- key_to_val: key -> value
- key_to_freq: key -> access frequency
- freq_to_keys: frequency -> OrderedDict of keys (for LRU tie-breaking)
- min_freq: tracks the current minimum frequency
The trick: when evicting, use min_freq to find the least frequent
bucket, then pop the first item (LRU) from that bucket's OrderedDict.
"""
def __init__(self, capacity: int):
if capacity <= 0:
raise ValueError("Capacity must be positive")
self._capacity = capacity
self._key_to_val: Dict[str, Any] = {}
self._key_to_freq: Dict[str, int] = {}
self._freq_to_keys: Dict[int, OrderedDict] = defaultdict(OrderedDict)
self._min_freq = 0
def _update_freq(self, key: str):
"""Increment frequency of key and update frequency buckets."""
freq = self._key_to_freq[key]
new_freq = freq + 1
# Remove from old frequency bucket
del self._freq_to_keys[freq][key]
if not self._freq_to_keys[freq]:
del self._freq_to_keys[freq]
if self._min_freq == freq:
self._min_freq = new_freq
# Add to new frequency bucket
self._key_to_freq[key] = new_freq
self._freq_to_keys[new_freq][key] = None
def get(self, key: str) -> Optional[Any]:
if key not in self._key_to_val:
return None
self._update_freq(key)
return self._key_to_val[key]
def put(self, key: str, value: Any) -> Optional[str]:
"""Insert or update. Returns evicted key if eviction occurred."""
if key in self._key_to_val:
self._key_to_val[key] = value
self._update_freq(key)
return None
# Evict if at capacity
evicted_key = None
if len(self._key_to_val) >= self._capacity:
# Remove LRU key from the minimum frequency bucket
evicted_key, _ = self._freq_to_keys[self._min_freq].popitem(last=False)
if not self._freq_to_keys[self._min_freq]:
del self._freq_to_keys[self._min_freq]
del self._key_to_val[evicted_key]
del self._key_to_freq[evicted_key]
# Insert new key with frequency 1
self._key_to_val[key] = value
self._key_to_freq[key] = 1
self._freq_to_keys[1][key] = None
self._min_freq = 1
return evicted_key
def size(self) -> int:
return len(self._key_to_val)
# ---- Usage ----
cache = LFUCache(capacity=3)
cache.put("a", 1)
cache.put("b", 2)
cache.put("c", 3)
cache.get("a") # freq: a=2, b=1, c=1
cache.get("a") # freq: a=3, b=1, c=1
cache.get("b") # freq: a=3, b=2, c=1
evicted = cache.put("d", 4) # Evicts 'c' (freq=1, least frequent)
print(f"Evicted: {evicted}") # 'c'
print(f"Get 'c': {cache.get('c')}") # None (evicted)
print(f"Get 'a': {cache.get('a')}") # 1 (still cached, high frequency)
Problem 3: TTL Cache
Implement a cache where entries expire after a configurable time-to-live. Expired entries are evicted lazily on access and eagerly via periodic cleanup.
import time
import heapq
from dataclasses import dataclass, field
class TTLCache:
"""Cache with Time-To-Live expiration.
Each entry has an expiration timestamp. Expired entries are
cleaned up both lazily (on get) and eagerly (via cleanup).
Data structures:
- _store: key -> (value, expire_time)
- _expiry_heap: min-heap of (expire_time, key) for efficient cleanup
"""
def __init__(self, default_ttl: int = 300, max_size: int = 1000):
"""
Args:
default_ttl: Default TTL in seconds
max_size: Maximum number of entries (evict oldest on overflow)
"""
self._default_ttl = default_ttl
self._max_size = max_size
self._store: Dict[str, tuple] = {} # key -> (value, expire_time)
self._expiry_heap: list = [] # min-heap of (expire_time, key)
def get(self, key: str) -> Optional[Any]:
"""Get value by key. Returns None if missing or expired."""
if key not in self._store:
return None
value, expire_time = self._store[key]
# Lazy expiration check
if time.time() > expire_time:
del self._store[key]
return None
return value
def put(self, key: str, value: Any, ttl: Optional[int] = None):
"""Set key with value and optional custom TTL."""
ttl = ttl if ttl is not None else self._default_ttl
expire_time = time.time() + ttl
# Evict expired entries if at capacity
if len(self._store) >= self._max_size and key not in self._store:
self.cleanup()
# If still at capacity after cleanup, evict soonest-to-expire
if len(self._store) >= self._max_size:
self._evict_next()
self._store[key] = (value, expire_time)
heapq.heappush(self._expiry_heap, (expire_time, key))
def delete(self, key: str) -> bool:
"""Remove a key. Returns True if it existed and was not expired."""
if key in self._store:
_, expire_time = self._store[key]
del self._store[key]
return time.time() <= expire_time
return False
def ttl_remaining(self, key: str) -> Optional[float]:
"""Return remaining TTL in seconds, or None if key doesn't exist."""
if key not in self._store:
return None
_, expire_time = self._store[key]
remaining = expire_time - time.time()
return max(0.0, remaining) if remaining > 0 else None
def cleanup(self) -> int:
"""Remove all expired entries. Returns count of removed entries."""
now = time.time()
removed = 0
while self._expiry_heap:
expire_time, key = self._expiry_heap[0]
if expire_time > now:
break
heapq.heappop(self._expiry_heap)
# Only delete if the entry still exists with this expiration
if key in self._store:
stored_value, stored_expire = self._store[key]
if stored_expire <= now:
del self._store[key]
removed += 1
return removed
def _evict_next(self):
"""Evict the entry closest to expiration."""
while self._expiry_heap:
expire_time, key = heapq.heappop(self._expiry_heap)
if key in self._store:
del self._store[key]
return
def size(self) -> int:
return len(self._store)
# ---- Usage ----
cache = TTLCache(default_ttl=5, max_size=100)
cache.put("session_1", {"user": "alice"}, ttl=2)
cache.put("session_2", {"user": "bob"}, ttl=10)
print(f"session_1: {cache.get('session_1')}") # {'user': 'alice'}
print(f"TTL remaining: {cache.ttl_remaining('session_1'):.1f}s")
# After TTL expires, entry is gone (simulated with short TTL)
# time.sleep(3)
# print(f"session_1 after TTL: {cache.get('session_1')}") # None
Problem 4: Write-Through Cache
Implement a cache that writes to both the cache and a backing store on every write. Reads check the cache first, then fall through to the backing store on miss.
from typing import Callable
class WriteThroughCache:
"""Write-through cache with a backing data store.
On write: update cache AND backing store (synchronously)
On read: check cache first, on miss read from backing store
and populate cache (read-through)
Trade-offs:
- Pro: Cache and store are always consistent
- Pro: Read misses are automatically populated
- Con: Writes are slower (must wait for store write)
- Con: Unnecessary writes for rarely-read data
Alternatives:
- Write-back: write to cache only, flush to store later (faster writes)
- Write-around: write to store only, populate cache on read miss
"""
def __init__(self, capacity: int, store_read: Callable,
store_write: Callable, store_delete: Callable):
"""
Args:
capacity: Max cache size (LRU eviction)
store_read: fn(key) -> value or None
store_write: fn(key, value) -> None
store_delete: fn(key) -> None
"""
self._cache = LRUCache(capacity)
self._store_read = store_read
self._store_write = store_write
self._store_delete = store_delete
def get(self, key: str) -> Optional[Any]:
"""Read-through: check cache, fall back to store."""
# Try cache first
value = self._cache.get(key)
if value is not None:
return value
# Cache miss: read from backing store
value = self._store_read(key)
if value is not None:
# Populate cache for future reads
self._cache.put(key, value)
return value
def put(self, key: str, value: Any):
"""Write-through: write to both cache and store."""
# Write to backing store first (source of truth)
self._store_write(key, value)
# Then update cache
self._cache.put(key, value)
def delete(self, key: str):
"""Delete from both cache and store."""
self._store_delete(key)
self._cache.delete(key)
def invalidate(self, key: str):
"""Remove from cache only (next read will fetch from store)."""
self._cache.delete(key)
def cache_hit_rate(self) -> float:
return self._cache.hit_rate()
# ---- Usage with simulated database ----
class FakeDatabase:
"""Simulated backing store."""
def __init__(self):
self._data = {}
self.read_count = 0
self.write_count = 0
def read(self, key):
self.read_count += 1
return self._data.get(key)
def write(self, key, value):
self.write_count += 1
self._data[key] = value
def delete(self, key):
self._data.pop(key, None)
db = FakeDatabase()
cache = WriteThroughCache(
capacity=3,
store_read=db.read,
store_write=db.write,
store_delete=db.delete,
)
# Write-through: both cache and DB updated
cache.put("user:1", {"name": "Alice"})
cache.put("user:2", {"name": "Bob"})
# Read from cache (no DB hit)
print(f"user:1 = {cache.get('user:1')}") # From cache
print(f"DB reads: {db.read_count}") # 0
# Invalidate and re-read (triggers DB read)
cache.invalidate("user:1")
print(f"user:1 = {cache.get('user:1')}") # From DB, then cached
print(f"DB reads: {db.read_count}") # 1
print(f"Cache hit rate: {cache.cache_hit_rate():.1%}")
OrderedDict for a clean O(1) solution. If the interviewer says "no OrderedDict," implement a HashMap + DoublyLinkedList manually. Always mention the time complexity of each operation without being asked — this shows you understand why the data structure choice matters.Key Takeaways
- LRU cache is the most commonly asked: use OrderedDict for O(1) get/put
- LFU cache needs three maps: key-to-value, key-to-frequency, frequency-to-keys
- TTL cache uses lazy expiration on access + eager cleanup via a min-heap
- Write-through ensures cache-store consistency but adds write latency
- Always handle edge cases: capacity 0, duplicate keys, eviction on insert
- Know the trade-offs between write-through, write-back, and write-around strategies
Lilly Tech Systems