Monte Carlo Simulation
Five problems that use random sampling to solve problems that are analytically intractable or computationally expensive. Monte Carlo methods are the backbone of quantitative finance, Bayesian inference, and computational physics. Every quant firm tests these skills.
Problem 1: Estimate Pi via Monte Carlo
Problem: Estimate the value of pi by randomly sampling points in a unit square and counting how many fall inside the inscribed quarter circle. This is the canonical Monte Carlo problem — every interview on simulation starts here.
import random
import math
# ---- CHALLENGE ----
def estimate_pi(num_samples):
"""Estimate pi using Monte Carlo sampling."""
pass
# ---- SOLUTION ----
def estimate_pi(num_samples):
"""Estimate pi using the unit square / quarter circle method.
Algorithm:
1. Generate random points (x, y) in [0, 1] x [0, 1]
2. Check if x^2 + y^2 <= 1 (inside the quarter circle)
3. pi/4 = (area of quarter circle) / (area of square)
4. pi ~ 4 * (points inside circle) / (total points)
Convergence: error ~ O(1/sqrt(n)) by the Central Limit Theorem.
For 2 decimal places of pi, need ~10,000 samples.
For 4 decimal places, need ~100,000,000 samples.
"""
inside = 0
for _ in range(num_samples):
x = random.random()
y = random.random()
if x * x + y * y <= 1.0:
inside += 1
pi_estimate = 4.0 * inside / num_samples
return pi_estimate
# ---- VERIFICATION ----
random.seed(42)
pi_est = estimate_pi(100000)
assert abs(pi_est - math.pi) < 0.05, f"pi estimate {pi_est} too far from {math.pi}"
print(f"Pi estimate: {pi_est:.4f} (true: {math.pi:.4f})")
print("Test passed!")
Problem 2: Monte Carlo Option Pricing
Problem: Price a European call option using Monte Carlo simulation. Simulate stock price paths under geometric Brownian motion and compute the discounted expected payoff. This is the most important Monte Carlo application in quantitative finance.
import math
import random
# ---- CHALLENGE ----
def monte_carlo_call_price(S0, K, r, sigma, T, num_simulations):
"""Price a European call option using Monte Carlo.
S0: initial stock price, K: strike price, r: risk-free rate,
sigma: volatility, T: time to expiration (years).
"""
pass
# ---- SOLUTION ----
def _box_muller_single():
"""Generate a single standard normal sample."""
u1 = random.random()
u2 = random.random()
while u1 == 0:
u1 = random.random()
return math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2)
def monte_carlo_call_price(S0, K, r, sigma, T, num_simulations):
"""European call option pricing via Monte Carlo.
Under risk-neutral pricing, stock price at expiry follows:
S_T = S0 * exp((r - sigma^2/2) * T + sigma * sqrt(T) * Z)
where Z ~ N(0,1)
Call payoff = max(S_T - K, 0)
Option price = exp(-r*T) * E[payoff] (discounted expected payoff)
This is the foundation of ALL Monte Carlo pricing in quant finance.
"""
payoff_sum = 0.0
drift = (r - 0.5 * sigma ** 2) * T
vol_factor = sigma * math.sqrt(T)
discount = math.exp(-r * T)
for _ in range(num_simulations):
# Generate standard normal
z = _box_muller_single()
# Simulate terminal stock price (geometric Brownian motion)
S_T = S0 * math.exp(drift + vol_factor * z)
# Call payoff
payoff = max(S_T - K, 0)
payoff_sum += payoff
# Discounted expected payoff
price = discount * (payoff_sum / num_simulations)
return price
def black_scholes_call(S0, K, r, sigma, T):
"""Closed-form Black-Scholes for verification.
C = S0 * N(d1) - K * exp(-rT) * N(d2)
d1 = (ln(S0/K) + (r + sigma^2/2) * T) / (sigma * sqrt(T))
d2 = d1 - sigma * sqrt(T)
"""
def _ncdf(x):
sign = 1 if x >= 0 else -1
ax = abs(x)
a1, a2, a3, a4, a5 = 0.254829592, -0.284496736, 1.421413741, -1.453152027, 1.061405429
p = 0.3275911
t = 1.0 / (1.0 + p * ax)
erf = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * math.exp(-ax * ax)
return 0.5 * (1 + sign * erf)
d1 = (math.log(S0 / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * math.sqrt(T))
d2 = d1 - sigma * math.sqrt(T)
return S0 * _ncdf(d1) - K * math.exp(-r * T) * _ncdf(d2)
# ---- VERIFICATION ----
random.seed(42)
# Standard option parameters
S0 = 100 # stock price
K = 105 # strike price
r = 0.05 # 5% risk-free rate
sigma = 0.2 # 20% volatility
T = 1.0 # 1 year
mc_price = monte_carlo_call_price(S0, K, r, sigma, T, num_simulations=100000)
bs_price = black_scholes_call(S0, K, r, sigma, T)
print(f"Monte Carlo price: ${mc_price:.2f}")
print(f"Black-Scholes price: ${bs_price:.2f}")
assert abs(mc_price - bs_price) < 0.50, f"MC {mc_price:.2f} too far from BS {bs_price:.2f}"
print("Test passed!")
Problem 3: Probability Puzzles via Simulation
Problem: Solve classic probability puzzles using Monte Carlo simulation: (1) the Birthday Problem, (2) the Monty Hall Problem, and (3) the Coupon Collector Problem. These puzzles appear frequently in quant interviews because they test whether candidates can translate probability questions into simulations.
import random
import math
# ---- CHALLENGE ----
def birthday_problem_sim(num_people, num_simulations=10000):
"""Estimate P(at least 2 people share a birthday) for num_people people."""
pass
def monty_hall_sim(num_simulations=10000):
"""Simulate the Monty Hall problem. Return P(win|switch) and P(win|stay)."""
pass
def coupon_collector_sim(num_coupons, num_simulations=10000):
"""Estimate expected number of draws to collect all num_coupons distinct coupons."""
pass
# ---- SOLUTION ----
def birthday_problem_sim(num_people, num_simulations=10000):
"""Birthday Problem: P(at least 2 share a birthday) in a group of n.
Analytical answer: P = 1 - (365/365) * (364/365) * ... * ((365-n+1)/365)
Simulation: generate n random birthdays, check for duplicates.
"""
matches = 0
for _ in range(num_simulations):
birthdays = set()
has_match = False
for _ in range(num_people):
b = random.randint(1, 365)
if b in birthdays:
has_match = True
break
birthdays.add(b)
if has_match:
matches += 1
sim_prob = matches / num_simulations
# Analytical calculation for comparison
analytical = 1.0
for i in range(num_people):
analytical *= (365 - i) / 365
analytical = 1 - analytical
return {"simulated": sim_prob, "analytical": analytical}
def monty_hall_sim(num_simulations=10000):
"""Monty Hall Problem: 3 doors, 1 car, 2 goats.
You pick a door. Host opens a goat door. Should you switch?
Answer: switching wins 2/3 of the time.
"""
switch_wins = 0
stay_wins = 0
for _ in range(num_simulations):
# Car is behind a random door (0, 1, 2)
car = random.randint(0, 2)
# Player picks a random door
pick = random.randint(0, 2)
# Host opens a door that is NOT the car and NOT the player's pick
# (if player picked the car, host picks randomly among the two goat doors)
available = [d for d in range(3) if d != pick and d != car]
host_opens = random.choice(available)
# Switch strategy: switch to the remaining door
switch_to = [d for d in range(3) if d != pick and d != host_opens][0]
if switch_to == car:
switch_wins += 1
if pick == car:
stay_wins += 1
return {
"p_win_switch": switch_wins / num_simulations,
"p_win_stay": stay_wins / num_simulations
}
def coupon_collector_sim(num_coupons, num_simulations=10000):
"""Coupon Collector Problem: expected draws to collect all n distinct types.
Analytical answer: E[T] = n * H(n) = n * (1 + 1/2 + 1/3 + ... + 1/n)
where H(n) is the n-th harmonic number.
"""
total_draws = 0
for _ in range(num_simulations):
collected = set()
draws = 0
while len(collected) < num_coupons:
coupon = random.randint(0, num_coupons - 1)
collected.add(coupon)
draws += 1
total_draws += draws
sim_expected = total_draws / num_simulations
# Analytical: n * H(n)
harmonic = sum(1.0 / i for i in range(1, num_coupons + 1))
analytical = num_coupons * harmonic
return {"simulated": sim_expected, "analytical": analytical}
# ---- VERIFICATION ----
random.seed(42)
# Birthday problem: 23 people -> ~50.7% probability
bp = birthday_problem_sim(23, num_simulations=10000)
assert abs(bp["simulated"] - bp["analytical"]) < 0.03
# Monty Hall: switching should win ~2/3
mh = monty_hall_sim(num_simulations=10000)
assert abs(mh["p_win_switch"] - 2/3) < 0.03
assert abs(mh["p_win_stay"] - 1/3) < 0.03
# Coupon collector: 10 types -> expected ~29.3 draws
cc = coupon_collector_sim(10, num_simulations=5000)
assert abs(cc["simulated"] - cc["analytical"]) < 2.0
print("All tests passed!")
Problem 4: Random Walk Analysis
Problem: Simulate a 1D random walk and compute key properties: expected displacement, expected squared displacement, probability of returning to origin, and first passage time. Also simulate the Gambler's Ruin problem.
import random
import math
# ---- CHALLENGE ----
def random_walk_1d(num_steps, num_simulations=10000):
"""Simulate 1D random walks and compute statistics."""
pass
def gamblers_ruin(initial_money, target_money, p_win=0.5, num_simulations=10000):
"""Simulate gambler's ruin problem."""
pass
# ---- SOLUTION ----
def random_walk_1d(num_steps, num_simulations=10000):
"""1D symmetric random walk: +1 or -1 with equal probability.
Key theoretical results:
- E[position] = 0 (by symmetry)
- E[position^2] = n (number of steps)
- RMS displacement = sqrt(n) (diffusion scaling)
- P(return to 0) -> 1 as n -> inf (recurrence in 1D and 2D)
"""
final_positions = []
returns_to_origin = 0
for _ in range(num_simulations):
position = 0
returned = False
for step in range(num_steps):
position += 1 if random.random() < 0.5 else -1
if step > 0 and position == 0 and not returned:
returned = True
final_positions.append(position)
if returned or position == 0:
returns_to_origin += 1
n = len(final_positions)
mean_pos = sum(final_positions) / n
mean_sq_pos = sum(p ** 2 for p in final_positions) / n
rms = math.sqrt(mean_sq_pos)
return {
"mean_position": mean_pos,
"mean_squared_displacement": mean_sq_pos,
"rms_displacement": rms,
"theoretical_rms": math.sqrt(num_steps),
"return_probability": returns_to_origin / num_simulations
}
def gamblers_ruin(initial_money, target_money, p_win=0.5, num_simulations=10000):
"""Gambler's Ruin: start with $initial, goal is $target.
Each round: win $1 with probability p, lose $1 with probability 1-p.
Game ends when money = 0 (ruin) or money = target (win).
Analytical result for fair game (p=0.5):
P(ruin) = 1 - initial/target
P(win) = initial/target
For biased game (p != 0.5):
P(ruin) = (r^target - r^initial) / (r^target - 1), where r = (1-p)/p
"""
wins = 0
total_rounds = 0
for _ in range(num_simulations):
money = initial_money
rounds = 0
while 0 < money < target_money:
if random.random() < p_win:
money += 1
else:
money -= 1
rounds += 1
if money >= target_money:
wins += 1
total_rounds += rounds
sim_p_win = wins / num_simulations
# Analytical P(win)
if abs(p_win - 0.5) < 1e-10:
analytical_p_win = initial_money / target_money
else:
r = (1 - p_win) / p_win
analytical_p_win = (1 - r ** initial_money) / (1 - r ** target_money)
return {
"p_win_simulated": sim_p_win,
"p_win_analytical": analytical_p_win,
"avg_rounds": total_rounds / num_simulations
}
# ---- VERIFICATION ----
random.seed(42)
# Random walk: RMS should approximate sqrt(n)
rw = random_walk_1d(100, num_simulations=10000)
assert abs(rw["mean_position"]) < 1.0, "Mean should be ~0"
assert abs(rw["rms_displacement"] - 10.0) < 2.0, "RMS should be ~sqrt(100)=10"
# Gambler's ruin: fair game, start=$20, target=$100
gr = gamblers_ruin(20, 100, p_win=0.5, num_simulations=5000)
assert abs(gr["p_win_simulated"] - 0.20) < 0.05, f"P(win) should be ~0.20"
print("All tests passed!")
Problem 5: Markov Chain Monte Carlo (Metropolis-Hastings)
Problem: Implement the Metropolis-Hastings algorithm to sample from an arbitrary target distribution. Use it to sample from a distribution that is difficult to sample from directly. This is the foundation of modern Bayesian computation.
import math
import random
# ---- CHALLENGE ----
def metropolis_hastings(target_log_pdf, initial, num_samples, proposal_std=1.0):
"""Sample from target distribution using Metropolis-Hastings."""
pass
# ---- SOLUTION ----
def _box_muller():
"""Generate a standard normal sample."""
u1 = random.random()
u2 = random.random()
while u1 == 0:
u1 = random.random()
return math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2)
def metropolis_hastings(target_log_pdf, initial, num_samples, proposal_std=1.0, burn_in=1000):
"""Metropolis-Hastings MCMC sampler.
Algorithm:
1. Start at initial position x
2. For each iteration:
a. Propose x' = x + Normal(0, proposal_std)
b. Compute acceptance ratio: alpha = exp(log_pdf(x') - log_pdf(x))
c. Accept x' with probability min(1, alpha)
d. If accepted, x = x'; otherwise stay at x
3. After burn-in period, collect samples
Key concepts:
- Uses LOG probabilities to avoid underflow
- Symmetric proposal -> simplified Metropolis algorithm
- Burn-in discards initial samples before chain reaches stationarity
- Acceptance rate should be ~23-44% for optimal mixing (1D: ~44%)
"""
samples = []
current = initial
current_log_p = target_log_pdf(current)
accepted = 0
total = 0
for i in range(num_samples + burn_in):
# Propose new state (symmetric random walk proposal)
proposal = current + proposal_std * _box_muller()
# Compute log acceptance ratio
proposal_log_p = target_log_pdf(proposal)
log_alpha = proposal_log_p - current_log_p
# Accept or reject
total += 1
if math.log(random.random()) < log_alpha:
current = proposal
current_log_p = proposal_log_p
accepted += 1
# Collect sample after burn-in
if i >= burn_in:
samples.append(current)
acceptance_rate = accepted / total
return {
"samples": samples,
"acceptance_rate": acceptance_rate,
"mean": sum(samples) / len(samples),
"std": math.sqrt(sum((x - sum(samples)/len(samples))**2 for x in samples) / (len(samples)-1))
}
# ---- VERIFICATION ----
random.seed(42)
# Target: standard normal distribution
# log_pdf(x) = -0.5 * x^2 (ignoring constant)
def standard_normal_log_pdf(x):
return -0.5 * x ** 2
result = metropolis_hastings(
target_log_pdf=standard_normal_log_pdf,
initial=0.0,
num_samples=50000,
proposal_std=2.0,
burn_in=5000
)
assert abs(result["mean"]) < 0.1, f"Mean should be ~0, got {result['mean']}"
assert abs(result["std"] - 1.0) < 0.15, f"Std should be ~1, got {result['std']}"
assert 0.15 < result["acceptance_rate"] < 0.70, f"Acceptance rate: {result['acceptance_rate']}"
# Target: mixture of two normals (hard to sample directly)
# p(x) proportional to exp(-0.5*(x-3)^2) + exp(-0.5*(x+3)^2)
def bimodal_log_pdf(x):
# log(exp(a) + exp(b)) using log-sum-exp trick
a = -0.5 * (x - 3) ** 2
b = -0.5 * (x + 3) ** 2
max_ab = max(a, b)
return max_ab + math.log(math.exp(a - max_ab) + math.exp(b - max_ab))
result2 = metropolis_hastings(
target_log_pdf=bimodal_log_pdf,
initial=0.0,
num_samples=50000,
proposal_std=2.0,
burn_in=5000
)
# Mean should be ~0 (symmetric bimodal)
assert abs(result2["mean"]) < 1.0, f"Bimodal mean should be ~0, got {result2['mean']}"
# Std should be > 1 (spread across two modes)
assert result2["std"] > 2.0, f"Bimodal std should be large, got {result2['std']}"
print("All tests passed!")
Key Takeaways
- Monte Carlo convergence is O(1/sqrt(n)) — quadrupling samples halves the error
- Option pricing via Monte Carlo is the most important application in quantitative finance
- Classic puzzles (Birthday, Monty Hall, Coupon Collector) test simulation thinking
- Random walks have beautiful theoretical properties: RMS displacement scales as sqrt(n)
- Metropolis-Hastings is the foundation of modern Bayesian computation
- Always use log-probabilities in MCMC to avoid numerical underflow
Lilly Tech Systems