Text Embeddings
Learn to create text embeddings using the most popular APIs and open-source models, with complete Python code examples for each provider.
OpenAI Embeddings
OpenAI offers the most widely used embedding models. They are fast, high-quality, and easy to use via API.
from openai import OpenAI
client = OpenAI() # Uses OPENAI_API_KEY env variable
# Single text embedding
response = client.embeddings.create(
input="Vector databases store high-dimensional embeddings",
model="text-embedding-3-small" # 1536 dimensions, cheapest
# model="text-embedding-3-large" # 3072 dimensions, highest quality
)
embedding = response.data[0].embedding
print(f"Dimensions: {len(embedding)}") # 1536
print(f"First 5 values: {embedding[:5]}")
# Batch embedding (up to 2048 texts per request)
texts = [
"Python is a programming language",
"Machine learning uses algorithms",
"The weather is sunny today"
]
response = client.embeddings.create(
input=texts,
model="text-embedding-3-small"
)
embeddings = [item.embedding for item in response.data]
print(f"Embedded {len(embeddings)} texts")
dimensions parameter to reduce output size. text-embedding-3-small can be reduced from 1536 to 512 or 256 with minimal quality loss, saving storage and compute.Voyage AI Embeddings
Voyage AI specializes in embeddings and often outperforms OpenAI on retrieval benchmarks, especially for code and domain-specific content.
import voyageai
client = voyageai.Client() # Uses VOYAGE_API_KEY env variable
# Embed documents
result = client.embed(
texts=["Vector databases enable similarity search"],
model="voyage-3", # General purpose, 1024 dims
# model="voyage-code-3" # Optimized for code
input_type="document" # "document" for indexing, "query" for search
)
embedding = result.embeddings[0]
print(f"Dimensions: {len(embedding)}") # 1024
# Embed a query (uses different prompt for asymmetric search)
query_result = client.embed(
texts=["How do vector databases work?"],
model="voyage-3",
input_type="query"
)
Cohere Embed
import cohere
co = cohere.ClientV2() # Uses CO_API_KEY env variable
# Embed documents
response = co.embed(
texts=["Embeddings capture semantic meaning"],
model="embed-english-v3.0",
input_type="search_document", # For indexing
embedding_types=["float"]
)
embedding = response.embeddings.float_[0]
print(f"Dimensions: {len(embedding)}") # 1024
# Embed a search query
query_response = co.embed(
texts=["What are embeddings used for?"],
model="embed-english-v3.0",
input_type="search_query",
embedding_types=["float"]
)
Google Gemini Embeddings
import google.generativeai as genai
genai.configure(api_key="your-api-key")
# Embed text
result = genai.embed_content(
model="models/text-embedding-004",
content="Semantic search finds documents by meaning",
task_type="retrieval_document" # or "retrieval_query"
)
embedding = result["embedding"]
print(f"Dimensions: {len(embedding)}") # 768
Open-Source: Sentence Transformers
Sentence Transformers is the most popular open-source library for generating embeddings. Models run locally — no API key or cost per request.
from sentence_transformers import SentenceTransformer
# Load a model (downloads on first use)
model = SentenceTransformer("all-MiniLM-L6-v2") # 384 dims, very fast
# model = SentenceTransformer("all-mpnet-base-v2") # 768 dims, better quality
# model = SentenceTransformer("BAAI/bge-large-en-v1.5") # 1024 dims, top quality
# Embed sentences
sentences = [
"Vector databases store embeddings",
"Similarity search finds related items",
"The cat sat on the mat"
]
embeddings = model.encode(sentences)
print(f"Shape: {embeddings.shape}") # (3, 384)
# Compute similarity
from sentence_transformers.util import cos_sim
similarities = cos_sim(embeddings, embeddings)
print(similarities)
Open-Source: E5 and BGE Models
from sentence_transformers import SentenceTransformer
# E5 models (Microsoft) - require "query: " or "passage: " prefix
e5 = SentenceTransformer("intfloat/e5-large-v2")
query_emb = e5.encode(["query: How do embeddings work?"])
doc_emb = e5.encode(["passage: Embeddings convert text to vectors..."])
# BGE models (BAAI) - require "Represent this sentence: " prefix for queries
bge = SentenceTransformer("BAAI/bge-large-en-v1.5")
query_emb = bge.encode(["Represent this sentence: How do embeddings work?"])
doc_emb = bge.encode(["Embeddings convert text to vectors..."])
Batch Embedding Best Practices
from openai import OpenAI
import time
client = OpenAI()
def batch_embed(texts, model="text-embedding-3-small", batch_size=2048):
"""Embed texts in batches to respect API limits."""
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i:i + batch_size]
response = client.embeddings.create(input=batch, model=model)
batch_embeddings = [item.embedding for item in response.data]
all_embeddings.extend(batch_embeddings)
print(f"Embedded batch {i // batch_size + 1} ({len(batch)} texts)")
return all_embeddings
# Embed 10,000 documents efficiently
embeddings = batch_embed(documents, batch_size=2048)
Caching Strategies
Embedding the same text twice wastes money and time. Cache embeddings to avoid redundant API calls:
import hashlib
import json
import os
CACHE_DIR = "./embedding_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
def get_embedding_cached(text, model="text-embedding-3-small"):
"""Get embedding with disk-based caching."""
# Create a hash of the input for the cache key
cache_key = hashlib.sha256(f"{model}:{text}".encode()).hexdigest()
cache_path = os.path.join(CACHE_DIR, f"{cache_key}.json")
# Check cache
if os.path.exists(cache_path):
with open(cache_path, "r") as f:
return json.load(f)
# Generate embedding
response = client.embeddings.create(input=[text], model=model)
embedding = response.data[0].embedding
# Save to cache
with open(cache_path, "w") as f:
json.dump(embedding, f)
return embedding
💡 Try It Yourself
Choose any embedding provider above. Embed the same 5 sentences with two different models and compare the cosine similarities. Do higher-dimensional models produce more differentiated similarity scores?
Lilly Tech Systems