Intermediate

Feature Definitions

Define Feast entities, feature views, data sources, and feature engineering pipelines.

Feature Definitions

Feast uses Python code to define features declaratively. This is the single source of truth for all features in your platform.

# feature_repo/features.py
from feast import Entity, FeatureView, Field, FileSource
from feast.types import Float32, Int64
from datetime import timedelta

# Entity: the primary key for feature lookups
driver = Entity(
    name="driver_id",
    join_keys=["driver_id"],
    description="Unique driver identifier",
)

# Data source: where raw feature data lives
driver_stats_source = FileSource(
    path="data/driver_stats.parquet",
    timestamp_field="event_timestamp",
    created_timestamp_column="created",
)

# Feature view: a group of related features
driver_stats_fv = FeatureView(
    name="driver_stats",
    entities=[driver],
    ttl=timedelta(days=30),
    schema=[
        Field(name="conv_rate", dtype=Float32),
        Field(name="acc_rate", dtype=Float32),
        Field(name="avg_daily_trips", dtype=Int64),
    ],
    source=driver_stats_source,
    online=True,
    tags={"team": "driver_performance"},
)

# On-demand feature: computed at request time
from feast import on_demand_feature_view, RequestSource

input_request = RequestSource(
    name="trip_request",
    schema=[Field(name="trip_distance", dtype=Float32)],
)

@on_demand_feature_view(
    sources=[driver_stats_fv, input_request],
    schema=[Field(name="trip_score", dtype=Float32)],
)
def trip_score(inputs):
    df = inputs
    df["trip_score"] = (df["conv_rate"] * 0.5 +
                        df["acc_rate"] * 0.3 +
                        (df["avg_daily_trips"] / 50) * 0.2)
    return df[["trip_score"]]

Apply Feature Definitions

cd feature_repo
feast apply
# Created entity driver_id
# Created feature view driver_stats
# Created on demand feature view trip_score

Verify Registry

# List all registered features
feast feature-views list
feast entities list
📦
Key concepts:
Entity = primary key (e.g., user_id, driver_id)
Feature View = group of features from one data source
On-Demand Feature = computed at request time from other features