Advanced

Backtesting & Evaluation

Evaluate your model with walk-forward validation, calculate key financial metrics (Sharpe ratio, maximum drawdown, win rate), and compare against buy-and-hold benchmarks.

Backtesting Engine

# app/backtester.py
import numpy as np
import pandas as pd


class Backtester:
    def __init__(self, initial_capital=10000):
        self.initial_capital = initial_capital

    def run(self, actual_prices, predicted_prices, threshold=0.01):
        signals = []
        for i in range(len(predicted_prices)):
            pct_change = (predicted_prices[i] - actual_prices[i]) / actual_prices[i]
            if pct_change > threshold: signals.append(1)    # Buy
            elif pct_change < -threshold: signals.append(-1) # Sell
            else: signals.append(0)                           # Hold

        capital = self.initial_capital
        position = 0
        portfolio = []

        for i in range(len(signals)):
            if signals[i] == 1 and position == 0:
                position = capital / actual_prices[i]
                capital = 0
            elif signals[i] == -1 and position > 0:
                capital = position * actual_prices[i]
                position = 0
            total = capital + (position * actual_prices[i])
            portfolio.append(total)

        return pd.DataFrame({
            "actual_price": actual_prices,
            "predicted_price": predicted_prices,
            "signal": signals,
            "portfolio_value": portfolio,
        })

    def calculate_metrics(self, results: pd.DataFrame) -> dict:
        portfolio = results["portfolio_value"]
        returns = portfolio.pct_change().dropna()

        total_return = (portfolio.iloc[-1] / portfolio.iloc[0] - 1) * 100
        sharpe = (returns.mean() / returns.std()) * np.sqrt(252) if returns.std() > 0 else 0
        max_dd = ((portfolio / portfolio.cummax()) - 1).min() * 100
        win_rate = (returns > 0).sum() / len(returns) * 100 if len(returns) > 0 else 0

        # Buy and hold benchmark
        prices = results["actual_price"]
        bh_return = (prices.iloc[-1] / prices.iloc[0] - 1) * 100

        return {
            "total_return_pct": round(total_return, 2),
            "sharpe_ratio": round(sharpe, 3),
            "max_drawdown_pct": round(max_dd, 2),
            "win_rate_pct": round(win_rate, 1),
            "num_trades": sum(1 for s in results["signal"] if s != 0),
            "buy_hold_return_pct": round(bh_return, 2),
            "alpha": round(total_return - bh_return, 2),
        }

Walk-Forward Validation

def walk_forward_validation(df, predictor, window=252, step=21):
    all_results = []
    for start in range(0, len(df) - window - step, step):
        train = df.iloc[start:start + window]
        test = df.iloc[start + window:start + window + step]
        predictor.train(train, epochs=20)
        preds = predictor.predict(test)
        all_results.append({
            "actual": test["Close"].values[-len(preds):],
            "predicted": preds,
        })
    return all_results
💡
Sharpe ratio benchmark: A Sharpe ratio above 1.0 is considered good, above 2.0 is very good, and above 3.0 is excellent. Most quantitative funds target Sharpe ratios of 1.5-2.5. If your model shows a Sharpe above 3.0, verify there is no data leakage.

Key Takeaways

  • Walk-forward validation prevents look-ahead bias by training only on past data.
  • Sharpe ratio measures risk-adjusted returns: higher is better.
  • Maximum drawdown shows the worst peak-to-trough decline in portfolio value.
  • Always compare against buy-and-hold to see if the model adds value over passive investing.