Intermediate
News Sentiment Analysis
Use FinBERT to score financial news headlines, aggregate daily sentiment, and analyze the correlation between news sentiment and stock price movements.
FinBERT Sentiment Scoring
# app/sentiment.py
import torch
import logging
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
logger = logging.getLogger(__name__)
class SentimentAnalyzer:
def __init__(self):
self.model_name = "ProsusAI/finbert"
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
self.model.eval()
self.labels = ["positive", "negative", "neutral"]
def score(self, text: str) -> dict:
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = self.model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0]
scores = {label: prob.item() for label, prob in zip(self.labels, probs)}
sentiment = max(scores, key=scores.get)
compound = scores["positive"] - scores["negative"]
return {"sentiment": sentiment, "compound": compound, "scores": scores}
def score_batch(self, texts: list[str]) -> list[dict]:
return [self.score(t) for t in texts]
def daily_sentiment(self, articles: list[dict]) -> pd.DataFrame:
records = []
for art in articles:
result = self.score(art["title"])
records.append({
"date": pd.to_datetime(art["published_at"]).date(),
"compound": result["compound"],
"sentiment": result["sentiment"],
})
df = pd.DataFrame(records)
daily = df.groupby("date").agg(
sentiment_mean=("compound", "mean"),
sentiment_count=("compound", "count"),
positive_ratio=("sentiment", lambda x: (x == "positive").mean()),
).reset_index()
return daily
Correlation Analysis
# Merge sentiment with price data
def merge_sentiment_with_prices(price_df, sentiment_df):
price_df = price_df.copy()
price_df["date"] = price_df.index.date
merged = price_df.merge(sentiment_df, on="date", how="left")
merged["sentiment_mean"] = merged["sentiment_mean"].fillna(0)
merged["sentiment_count"] = merged["sentiment_count"].fillna(0)
# Correlation
corr = merged[["daily_return", "sentiment_mean"]].corr()
print(f"Return-Sentiment correlation: {corr.iloc[0, 1]:.4f}")
return merged
Sentiment lag: News sentiment often leads price movements by 1-3 days. Try shifting sentiment features forward to capture this predictive relationship in your model.
Key Takeaways
- FinBERT is specifically trained on financial text and outperforms general sentiment models on stock news.
- Daily aggregation smooths noise from individual headline scores.
- Sentiment-price correlation varies by stock: high for meme stocks, lower for large-cap stable companies.
- Lag analysis reveals whether sentiment predicts or reacts to price movements.
Lilly Tech Systems