Advanced
Backtesting & Evaluation
Evaluate your model with walk-forward validation, calculate key financial metrics (Sharpe ratio, maximum drawdown, win rate), and compare against buy-and-hold benchmarks.
Backtesting Engine
# app/backtester.py
import numpy as np
import pandas as pd
class Backtester:
def __init__(self, initial_capital=10000):
self.initial_capital = initial_capital
def run(self, actual_prices, predicted_prices, threshold=0.01):
signals = []
for i in range(len(predicted_prices)):
pct_change = (predicted_prices[i] - actual_prices[i]) / actual_prices[i]
if pct_change > threshold: signals.append(1) # Buy
elif pct_change < -threshold: signals.append(-1) # Sell
else: signals.append(0) # Hold
capital = self.initial_capital
position = 0
portfolio = []
for i in range(len(signals)):
if signals[i] == 1 and position == 0:
position = capital / actual_prices[i]
capital = 0
elif signals[i] == -1 and position > 0:
capital = position * actual_prices[i]
position = 0
total = capital + (position * actual_prices[i])
portfolio.append(total)
return pd.DataFrame({
"actual_price": actual_prices,
"predicted_price": predicted_prices,
"signal": signals,
"portfolio_value": portfolio,
})
def calculate_metrics(self, results: pd.DataFrame) -> dict:
portfolio = results["portfolio_value"]
returns = portfolio.pct_change().dropna()
total_return = (portfolio.iloc[-1] / portfolio.iloc[0] - 1) * 100
sharpe = (returns.mean() / returns.std()) * np.sqrt(252) if returns.std() > 0 else 0
max_dd = ((portfolio / portfolio.cummax()) - 1).min() * 100
win_rate = (returns > 0).sum() / len(returns) * 100 if len(returns) > 0 else 0
# Buy and hold benchmark
prices = results["actual_price"]
bh_return = (prices.iloc[-1] / prices.iloc[0] - 1) * 100
return {
"total_return_pct": round(total_return, 2),
"sharpe_ratio": round(sharpe, 3),
"max_drawdown_pct": round(max_dd, 2),
"win_rate_pct": round(win_rate, 1),
"num_trades": sum(1 for s in results["signal"] if s != 0),
"buy_hold_return_pct": round(bh_return, 2),
"alpha": round(total_return - bh_return, 2),
}
Walk-Forward Validation
def walk_forward_validation(df, predictor, window=252, step=21):
all_results = []
for start in range(0, len(df) - window - step, step):
train = df.iloc[start:start + window]
test = df.iloc[start + window:start + window + step]
predictor.train(train, epochs=20)
preds = predictor.predict(test)
all_results.append({
"actual": test["Close"].values[-len(preds):],
"predicted": preds,
})
return all_results
Sharpe ratio benchmark: A Sharpe ratio above 1.0 is considered good, above 2.0 is very good, and above 3.0 is excellent. Most quantitative funds target Sharpe ratios of 1.5-2.5. If your model shows a Sharpe above 3.0, verify there is no data leakage.
Key Takeaways
- Walk-forward validation prevents look-ahead bias by training only on past data.
- Sharpe ratio measures risk-adjusted returns: higher is better.
- Maximum drawdown shows the worst peak-to-trough decline in portfolio value.
- Always compare against buy-and-hold to see if the model adds value over passive investing.
Lilly Tech Systems