Skip to content

Commit d9af218

Browse files
committed
feat(cli): Add recommendation template for feast init (#5478)
Add a new "recommendation" template demonstrating product recommendations using vector similarity search with pre-computed embeddings (all-MiniLM-L6-v2) and SQLite online store with vector_enabled.
1 parent 1e1f5d9 commit d9af218

7 files changed

Lines changed: 277 additions & 0 deletions

File tree

sdk/python/feast/cli/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List
447447
"ray",
448448
"ray_rag",
449449
"pytorch_nlp",
450+
"recommendation",
450451
],
451452
case_sensitive=False,
452453
),

sdk/python/feast/templates/recommendation/__init__.py

Whitespace-only changes.
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
from feast.file_utils import replace_str_in_file
2+
3+
4+
def bootstrap():
5+
# Called automatically by init_repo() during `feast init`
6+
7+
import pathlib
8+
from datetime import datetime, timedelta
9+
10+
import numpy as np
11+
import pandas as pd
12+
13+
try:
14+
from sentence_transformers import SentenceTransformer
15+
except ImportError:
16+
raise SystemExit(
17+
"sentence-transformers is required for this template: "
18+
"pip install sentence-transformers"
19+
)
20+
21+
repo_path = pathlib.Path(__file__).parent.absolute() / "feature_repo"
22+
project_name = pathlib.Path(__file__).parent.absolute().name
23+
data_path = repo_path / "data"
24+
data_path.mkdir(exist_ok=True)
25+
26+
products = [
27+
(
28+
"P001",
29+
"Wireless Noise-Cancelling Headphones",
30+
"Premium over-ear headphones with active noise cancellation and 30-hour battery life.",
31+
"Electronics",
32+
299.99,
33+
4.7,
34+
),
35+
(
36+
"P002",
37+
"Bluetooth Portable Speaker",
38+
"Waterproof portable speaker with deep bass and 12-hour playtime.",
39+
"Electronics",
40+
79.99,
41+
4.5,
42+
),
43+
(
44+
"P003",
45+
"Mechanical Gaming Keyboard",
46+
"RGB mechanical keyboard with Cherry MX switches and programmable keys.",
47+
"Electronics",
48+
149.99,
49+
4.6,
50+
),
51+
(
52+
"P004",
53+
"Ergonomic Wireless Mouse",
54+
"Vertical ergonomic mouse designed to reduce wrist strain.",
55+
"Electronics",
56+
49.99,
57+
4.3,
58+
),
59+
(
60+
"P005",
61+
"Python Machine Learning Cookbook",
62+
"Practical recipes for building ML models with scikit-learn and TensorFlow.",
63+
"Books",
64+
39.99,
65+
4.4,
66+
),
67+
(
68+
"P006",
69+
"Data Engineering Fundamentals",
70+
"Comprehensive guide to building modern data pipelines and architectures.",
71+
"Books",
72+
44.99,
73+
4.6,
74+
),
75+
(
76+
"P007",
77+
"Introduction to Deep Learning",
78+
"Beginner-friendly deep learning textbook with hands-on PyTorch examples.",
79+
"Books",
80+
54.99,
81+
4.7,
82+
),
83+
(
84+
"P008",
85+
"Trail Running Shoes",
86+
"Lightweight trail running shoes with superior grip and cushioning.",
87+
"Sports",
88+
129.99,
89+
4.6,
90+
),
91+
(
92+
"P009",
93+
"Premium Yoga Mat",
94+
"Non-slip extra-thick yoga mat with carrying strap.",
95+
"Sports",
96+
34.99,
97+
4.4,
98+
),
99+
(
100+
"P010",
101+
"Resistance Bands Set",
102+
"Set of 5 resistance bands with varying tension levels for home workouts.",
103+
"Sports",
104+
24.99,
105+
4.3,
106+
),
107+
(
108+
"P011",
109+
"Robot Vacuum Cleaner",
110+
"Self-navigating robot vacuum with app control and auto-charging.",
111+
"Home",
112+
349.99,
113+
4.5,
114+
),
115+
(
116+
"P012",
117+
"Air Purifier with HEPA Filter",
118+
"Room air purifier with true HEPA filter and air quality sensor.",
119+
"Home",
120+
199.99,
121+
4.6,
122+
),
123+
]
124+
125+
model = SentenceTransformer("all-MiniLM-L6-v2")
126+
descriptions = [f"{name}. {desc}" for _, name, desc, _, _, _ in products]
127+
embeddings = model.encode(descriptions, normalize_embeddings=True)
128+
129+
end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
130+
start_date = end_date - timedelta(days=15)
131+
timestamps = [start_date + timedelta(hours=i) for i in range(len(products))]
132+
133+
columns = [
134+
"product_id",
135+
"product_name",
136+
"description",
137+
"category",
138+
"price",
139+
"rating",
140+
]
141+
df = pd.DataFrame(products, columns=columns)
142+
df["price"] = df["price"].astype(np.float32)
143+
df["rating"] = df["rating"].astype(np.float32)
144+
df["embedding"] = [emb.tolist() for emb in embeddings]
145+
df["event_timestamp"] = timestamps
146+
df["created"] = end_date
147+
148+
products_path = data_path / "products.parquet"
149+
df.to_parquet(path=str(products_path), allow_truncated_timestamps=True)
150+
151+
example_py_file = repo_path / "feature_definitions.py"
152+
replace_str_in_file(example_py_file, "%PROJECT_NAME%", str(project_name))
153+
replace_str_in_file(
154+
example_py_file, "%PARQUET_PATH%", str(products_path.relative_to(repo_path))
155+
)
156+
157+
158+
if __name__ == "__main__":
159+
bootstrap()

sdk/python/feast/templates/recommendation/feature_repo/__init__.py

Whitespace-only changes.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from datetime import timedelta
2+
3+
from feast import Entity, FeatureService, FeatureView, Field, FileSource, Project
4+
from feast.types import Array, Float32, String
5+
6+
project = Project(
7+
name="%PROJECT_NAME%",
8+
description="A project for product recommendations using vector similarity search",
9+
)
10+
11+
product = Entity(name="product", join_keys=["product_id"])
12+
13+
# Parquet source with pre-computed embeddings. Replace with BigQuery,
14+
# Snowflake, etc. for production use cases.
15+
products_source = FileSource(
16+
name="products_source",
17+
path="%PARQUET_PATH%",
18+
timestamp_field="event_timestamp",
19+
created_timestamp_column="created",
20+
)
21+
22+
# Embedding generated by all-MiniLM-L6-v2 (384 dimensions)
23+
product_embeddings = FeatureView(
24+
name="product_embeddings",
25+
entities=[product],
26+
ttl=timedelta(days=365),
27+
schema=[
28+
Field(
29+
name="embedding", dtype=Array(Float32), vector_index=True, vector_length=384
30+
),
31+
Field(name="product_name", dtype=String),
32+
Field(name="description", dtype=String),
33+
Field(name="category", dtype=String),
34+
Field(name="price", dtype=Float32),
35+
Field(name="rating", dtype=Float32),
36+
],
37+
online=True,
38+
source=products_source,
39+
tags={"team": "recommendations"},
40+
)
41+
42+
recommendation_service = FeatureService(
43+
name="recommendation_service",
44+
features=[product_embeddings],
45+
)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
project: my_project
2+
# By default, the registry is a file (but can be turned into a more scalable SQL-backed registry)
3+
registry: data/registry.db
4+
# The provider primarily specifies default offline / online stores & storing the registry in a given cloud
5+
provider: local
6+
online_store:
7+
type: sqlite
8+
path: data/online_store.db
9+
vector_enabled: true
10+
entity_key_serialization_version: 3
11+
# By default, no_auth for authentication and authorization, other possible values kubernetes and oidc. Refer the documentation for more details.
12+
auth:
13+
type: no_auth
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import subprocess
2+
import sys
3+
from datetime import datetime
4+
5+
from feast import FeatureStore
6+
7+
8+
def run_demo():
9+
store = FeatureStore(repo_path=".")
10+
11+
print("\n--- Run feast apply ---")
12+
subprocess.run(["feast", "apply"])
13+
14+
print("\n--- Load features into online store ---")
15+
store.materialize_incremental(end_date=datetime.now())
16+
17+
print("\n--- Product Recommendation Search ---")
18+
try:
19+
from sentence_transformers import SentenceTransformer
20+
except ImportError:
21+
print("sentence-transformers is required: pip install sentence-transformers")
22+
sys.exit(1)
23+
24+
model = SentenceTransformer("all-MiniLM-L6-v2")
25+
26+
query = "gaming laptop accessories"
27+
print(f"\n Query: '{query}'")
28+
query_embedding = model.encode([query], normalize_embeddings=True)[0].tolist()
29+
30+
results = store.retrieve_online_documents_v2(
31+
features=[
32+
"product_embeddings:embedding",
33+
"product_embeddings:product_name",
34+
"product_embeddings:category",
35+
"product_embeddings:price",
36+
"product_embeddings:rating",
37+
],
38+
query=query_embedding,
39+
top_k=5,
40+
).to_dict()
41+
42+
if results and len(results.get("product_id", [])) > 0:
43+
num_results = len(results["product_id"])
44+
print(f" Top {num_results} recommendations:")
45+
for i in range(num_results):
46+
name = results["product_name"][i]
47+
category = results["category"][i]
48+
price = results["price"][i]
49+
rating = results["rating"][i]
50+
print(f" {i + 1}. {name} [{category}] - ${price:.2f} (rating: {rating})")
51+
else:
52+
print(" No results found.")
53+
54+
print("\n--- Run feast teardown ---")
55+
subprocess.run(["feast", "teardown"])
56+
57+
58+
if __name__ == "__main__":
59+
run_demo()

0 commit comments

Comments
 (0)