Skip to content

Commit 2fc9594

Browse files
committed
Example of other data
1 parent c0acde0 commit 2fc9594

File tree

10 files changed

+199322
-233460
lines changed

10 files changed

+199322
-233460
lines changed

locustfile.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ def ask_question(self):
1717
"messages": [
1818
{
1919
"content": random.choice(
20-
["Best shoe for hiking?", "Climbing shoe cheaper than $30?", "Waterproof camping gear?"]
20+
[
21+
"Summarize GitHub services shown in Python sessions",
22+
"Livestreams about Copilot",
23+
"In-person sessions about GitHub Actions",
24+
]
2125
),
2226
"role": "user",
2327
}

src/backend/fastapi_app/api_models.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class ThoughtStep(BaseModel):
4747

4848

4949
class RAGContext(BaseModel):
50-
data_points: dict[int, dict[str, Any]]
50+
data_points: dict[str, dict[str, Any]]
5151
thoughts: list[ThoughtStep]
5252
followup_questions: list[str] | None = None
5353

@@ -65,12 +65,15 @@ class RetrievalResponseDelta(BaseModel):
6565

6666

6767
class ItemPublic(BaseModel):
68-
id: int
69-
type: str
70-
brand: str
71-
name: str
68+
# This should match postgres_models.py
69+
id: str
70+
title: str
7271
description: str
73-
price: float
72+
speakers: list[str]
73+
tracks: list[str]
74+
day: str
75+
time: str
76+
mode: str
7477

7578

7679
class ItemWithDistance(ItemPublic):

src/backend/fastapi_app/postgres_models.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,34 @@
11
from __future__ import annotations
22

3-
from dataclasses import asdict
4-
53
from pgvector.sqlalchemy import Vector
6-
from sqlalchemy import Index
7-
from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column
4+
from sqlalchemy import Index, String
5+
from sqlalchemy.dialects.postgresql import ARRAY
6+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
87

98

109
# Define the models
11-
class Base(DeclarativeBase, MappedAsDataclass):
10+
class Base(DeclarativeBase):
1211
pass
1312

1413

1514
class Item(Base):
16-
__tablename__ = "items"
17-
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
18-
type: Mapped[str] = mapped_column()
19-
brand: Mapped[str] = mapped_column()
20-
name: Mapped[str] = mapped_column()
15+
__tablename__ = "sessions"
16+
# An ID column should always be defined, but it can be int or string
17+
id: Mapped[str] = mapped_column(primary_key=True)
18+
# Schema specific:
19+
title: Mapped[str] = mapped_column()
2120
description: Mapped[str] = mapped_column()
22-
price: Mapped[float] = mapped_column()
23-
embedding_ada002: Mapped[Vector] = mapped_column(Vector(1536)) # ada-002
24-
embedding_nomic: Mapped[Vector] = mapped_column(Vector(768)) # nomic-embed-text
21+
speakers: Mapped[list[str]] = mapped_column(ARRAY(String))
22+
tracks: Mapped[list[str]] = mapped_column(ARRAY(String))
23+
day: Mapped[str] = mapped_column()
24+
time: Mapped[str] = mapped_column()
25+
mode: Mapped[str] = mapped_column()
26+
# Embeddings for different models:
27+
embedding_ada002: Mapped[Vector] = mapped_column(Vector(1536), nullable=True) # ada-002
28+
embedding_nomic: Mapped[Vector] = mapped_column(Vector(768), nullable=True) # nomic-embed-text
2529

2630
def to_dict(self, include_embedding: bool = False):
27-
model_dict = asdict(self)
31+
model_dict = {column.name: getattr(self, column.name) for column in self.__table__.columns}
2832
if include_embedding:
2933
model_dict["embedding_ada002"] = model_dict.get("embedding_ada002", [])
3034
model_dict["embedding_nomic"] = model_dict.get("embedding_nomic", [])
@@ -34,23 +38,24 @@ def to_dict(self, include_embedding: bool = False):
3438
return model_dict
3539

3640
def to_str_for_rag(self):
37-
return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
41+
return f"Title:{self.title} Description:{self.description} Speakers:{self.speakers} Tracks:{self.tracks} Day:{self.day} Time:{self.time} Mode:{self.mode}" # noqa
3842

3943
def to_str_for_embedding(self):
40-
return f"Name: {self.name} Description: {self.description} Type: {self.type}"
44+
return f"Name: {self.title} Description: {self.description} Tracks: {self.tracks} Day: {self.day} Mode: {self.mode}" # noqa
4145

4246

4347
# Define HNSW index to support vector similarity search through the vector_cosine_ops access method (cosine distance).
4448
index_ada002 = Index(
45-
"hnsw_index_for_innerproduct_item_embedding_ada002",
49+
# TODO: generate based off table name
50+
"hnsw_index_for_innerproduct_session_embedding_ada002",
4651
Item.embedding_ada002,
4752
postgresql_using="hnsw",
4853
postgresql_with={"m": 16, "ef_construction": 64},
4954
postgresql_ops={"embedding_ada002": "vector_ip_ops"},
5055
)
5156

5257
index_nomic = Index(
53-
"hnsw_index_for_innerproduct_item_embedding_nomic",
58+
"hnsw_index_for_innerproduct_session_embedding_nomic",
5459
Item.embedding_nomic,
5560
postgresql_using="hnsw",
5661
postgresql_with={"m": 16, "ef_construction": 64},
Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
Assistant helps customers with questions about products.
2-
Respond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.
3-
Answer ONLY with the product details listed in the products.
1+
Assistant helps customers with questions about GitHub Universe conference session.
2+
Respond as if you are a helpful chatbot helping a user on a conference website, in a personable and friendly tone.
3+
Answer ONLY based on the provided sources.
44
If there isn't enough information below, say you don't know.
55
Do not generate answers that don't use the sources below.
6-
Each product has an ID in brackets followed by colon and the product details.
7-
Always include the product ID for each product you use in the response.
8-
Use square brackets to reference the source, for example [52].
9-
Don't combine citations, list each product separately, for example [27][51].
6+
Each session has an ID in brackets followed by colon and the session details.
7+
At the end of your answer, include any referenced session IDs in square brackets,
8+
for example [session-1724855655436001ip2k-1724879694174001ZaSd].
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.
2-
You have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.
2+
You have access to an Azure PostgreSQL database with a table about GitHub Universe conference sessions,
3+
that has columns for title, description, tracks, speakers, day, time, and mode.
34
Generate a search query based on the conversation and the new question.
45
If the question is not in English, translate the question to English before generating the search query.
56
If you cannot generate a search query, return the original user question.
6-
DO NOT return anything besides the query.
7+
DO NOT return anything besides the query.
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,34 @@
11
[
2-
{"role": "user", "content": "good options for climbing gear that can be used outside?"},
2+
{"role": "user", "content": "any sessions about python for AI apps?"},
33
{"role": "assistant", "tool_calls": [
44
{
55
"id": "call_abc123",
66
"type": "function",
77
"function": {
8-
"arguments": "{\"search_query\":\"climbing gear outside\"}",
8+
"arguments": "{\"search_query\":\"python AI\"}",
99
"name": "search_database"
1010
}
1111
}
1212
]},
1313
{
1414
"role": "tool",
1515
"tool_call_id": "call_abc123",
16-
"content": "Search results for climbing gear that can be used outside: ..."
16+
"content": "Search results for Python AI app sessions: ..."
1717
},
18-
{"role": "user", "content": "are there any shoes less than $50?"},
18+
{"role": "user", "content": "are there any recorded sessions about Java?"},
1919
{"role": "assistant", "tool_calls": [
2020
{
2121
"id": "call_abc456",
2222
"type": "function",
2323
"function": {
24-
"arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
24+
"arguments": "{\"search_query\":\"Java\",\"mode_filter\":{\"comparison_operator\":\"=\",\"value\":\"Recorded\"}}",
2525
"name": "search_database"
2626
}
2727
}
2828
]},
2929
{
3030
"role": "tool",
3131
"tool_call_id": "call_abc456",
32-
"content": "Search results for shoes cheaper than 50: ..."
32+
"content": "Search results for Java sessions: ..."
3333
}
3434
]

src/backend/fastapi_app/query_rewriter.py

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,39 +12,25 @@ def build_search_function() -> list[ChatCompletionToolParam]:
1212
"type": "function",
1313
"function": {
1414
"name": "search_database",
15-
"description": "Search PostgreSQL database for relevant products based on user query",
15+
"description": "Search PostgreSQL database for relevant conference sessions based on user query",
1616
"parameters": {
1717
"type": "object",
1818
"properties": {
1919
"search_query": {
2020
"type": "string",
21-
"description": "Query string to use for full text search, e.g. 'red shoes'",
21+
"description": "Query string to use for full text search, e.g. 'python AI'",
2222
},
23-
"price_filter": {
23+
"mode_filter": {
2424
"type": "object",
25-
"description": "Filter search results based on price of the product",
26-
"properties": {
27-
"comparison_operator": {
28-
"type": "string",
29-
"description": "Operator to compare the column value, either '>', '<', '>=', '<=', '='", # noqa
30-
},
31-
"value": {
32-
"type": "number",
33-
"description": "Value to compare against, e.g. 30",
34-
},
35-
},
36-
},
37-
"brand_filter": {
38-
"type": "object",
39-
"description": "Filter search results based on brand of the product",
25+
"description": "Filter search results based on mode of the session",
4026
"properties": {
4127
"comparison_operator": {
4228
"type": "string",
4329
"description": "Operator to compare the column value, either '=' or '!='",
4430
},
4531
"value": {
4632
"type": "string",
47-
"description": "Value to compare against, e.g. AirStrider",
33+
"description": "Possible values are 'In-person', 'Livestream', or 'Recorded'.",
4834
},
4935
},
5036
},
@@ -69,22 +55,13 @@ def extract_search_arguments(original_user_query: str, chat_completion: ChatComp
6955
arg = json.loads(function.arguments)
7056
# Even though its required, search_query is not always specified
7157
search_query = arg.get("search_query", original_user_query)
72-
if "price_filter" in arg and arg["price_filter"]:
73-
price_filter = arg["price_filter"]
74-
filters.append(
75-
{
76-
"column": "price",
77-
"comparison_operator": price_filter["comparison_operator"],
78-
"value": price_filter["value"],
79-
}
80-
)
81-
if "brand_filter" in arg and arg["brand_filter"]:
82-
brand_filter = arg["brand_filter"]
58+
if "mode_filter" in arg and arg["mode_filter"]:
59+
mode_filter = arg["mode_filter"]
8360
filters.append(
8461
{
85-
"column": "brand",
86-
"comparison_operator": brand_filter["comparison_operator"],
87-
"value": brand_filter["value"],
62+
"column": "mode",
63+
"comparison_operator": mode_filter["comparison_operator"],
64+
"value": mode_filter["value"],
8865
}
8966
)
9067
elif query_text := response_message.content:

0 commit comments

Comments
 (0)