Intermediate

News Sentiment Analysis

Use FinBERT to score financial news headlines, aggregate daily sentiment, and analyze the correlation between news sentiment and stock price movements.

FinBERT Sentiment Scoring

# app/sentiment.py
import torch
import logging
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification

logger = logging.getLogger(__name__)


class SentimentAnalyzer:
    def __init__(self):
        self.model_name = "ProsusAI/finbert"
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
        self.model.eval()
        self.labels = ["positive", "negative", "neutral"]

    def score(self, text: str) -> dict:
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
        scores = {label: prob.item() for label, prob in zip(self.labels, probs)}
        sentiment = max(scores, key=scores.get)
        compound = scores["positive"] - scores["negative"]
        return {"sentiment": sentiment, "compound": compound, "scores": scores}

    def score_batch(self, texts: list[str]) -> list[dict]:
        return [self.score(t) for t in texts]

    def daily_sentiment(self, articles: list[dict]) -> pd.DataFrame:
        records = []
        for art in articles:
            result = self.score(art["title"])
            records.append({
                "date": pd.to_datetime(art["published_at"]).date(),
                "compound": result["compound"],
                "sentiment": result["sentiment"],
            })
        df = pd.DataFrame(records)
        daily = df.groupby("date").agg(
            sentiment_mean=("compound", "mean"),
            sentiment_count=("compound", "count"),
            positive_ratio=("sentiment", lambda x: (x == "positive").mean()),
        ).reset_index()
        return daily

Correlation Analysis

# Merge sentiment with price data
def merge_sentiment_with_prices(price_df, sentiment_df):
    price_df = price_df.copy()
    price_df["date"] = price_df.index.date
    merged = price_df.merge(sentiment_df, on="date", how="left")
    merged["sentiment_mean"] = merged["sentiment_mean"].fillna(0)
    merged["sentiment_count"] = merged["sentiment_count"].fillna(0)

    # Correlation
    corr = merged[["daily_return", "sentiment_mean"]].corr()
    print(f"Return-Sentiment correlation: {corr.iloc[0, 1]:.4f}")
    return merged
💡
Sentiment lag: News sentiment often leads price movements by 1-3 days. Try shifting sentiment features forward to capture this predictive relationship in your model.

Key Takeaways

  • FinBERT is specifically trained on financial text and outperforms general sentiment models on stock news.
  • Daily aggregation smooths noise from individual headline scores.
  • Sentiment-price correlation varies by stock: high for meme stocks, lower for large-cap stable companies.
  • Lag analysis reveals whether sentiment predicts or reacts to price movements.