PanoEvJ commited on
Commit
cd5daea
1 Parent(s): eafb54e

copied all files

Browse files
Files changed (5) hide show
  1. .gitignore +4 -0
  2. Dockerfile +11 -0
  3. README.md +6 -5
  4. app.py +211 -0
  5. requirements.txt +9 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ __pycache__
3
+ .cache
4
+ .chainlit
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,12 @@
1
  ---
2
- title: NBA Assist
3
- emoji: 🦀
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
8
- license: bigscience-openrail-m
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Barbenheimer RAQA Application Chainlit Demo
3
+ emoji: 🔥
4
+ colorFrom: red
5
+ colorTo: red
6
  sdk: docker
7
  pinned: false
8
+ license: apache-2.0
9
+ duplicated_from: ai-maker-space/Barbie-RAQA-Application-Chainlit-Demo
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import chainlit as cl
4
+ import pandas as pd
5
+ import chromadb
6
+
7
+ from chainlit import user_session
8
+ from sqlalchemy import create_engine
9
+ from typing import List, Tuple, Any
10
+ from pydantic import BaseModel, Field
11
+ from llama_index import Document
12
+ from llama_index import SQLDatabase
13
+ from llama_index.agent import OpenAIAgent
14
+ from llama_index.tools.query_engine import QueryEngineTool
15
+ from llama_index.indices.struct_store.sql_query import NLSQLTableQueryEngine
16
+ from llama_index import ServiceContext
17
+ from llama_index.llms import OpenAI
18
+ from llama_index.embeddings.openai import OpenAIEmbedding
19
+ from llama_index import VectorStoreIndex
20
+ from llama_index.vector_stores import ChromaVectorStore
21
+ from llama_index.storage.storage_context import StorageContext
22
+ from llama_index.tools import FunctionTool
23
+ from llama_index.retrievers import VectorIndexRetriever
24
+ from llama_index.query_engine import RetrieverQueryEngine
25
+ from llama_index.vector_stores.types import (
26
+ VectorStoreInfo,
27
+ MetadataInfo,
28
+ ExactMatchFilter,
29
+ MetadataFilters,
30
+ )
31
+
32
+ openai.api_key = os.environ["OPENAI_API_KEY"]
33
+
34
+ # preparation
35
+ def get_df_from_workbook(sheet_name,
36
+ workbook_id = '1MB1ZsQul4AB262AsaY4fHtGW4HWp2-56zB-E5xTbs2A'):
37
+ url = f'https://docs.google.com/spreadsheets/d/{workbook_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}'
38
+ return pd.read_csv(url)
39
+
40
+ docEmailSample = Document(
41
+ text="Hey KD, let's grab dinner after our next game, Steph",
42
+ metadata={'from_to': 'Stephen Curry to Kevin Durant',}
43
+ )
44
+ docEmailSample2 = Document(
45
+ text="Yo Joker, you were a monster last year, can't wait to play against you in the opener! Draymond",
46
+ metadata={'from_to': 'Draymond Green to Nikola Jokic',}
47
+ )
48
+ docAdditionalSamples = [docEmailSample, docEmailSample2]
49
+
50
+ class AutoRetrieveModel(BaseModel):
51
+ query: str = Field(..., description="natural language query string")
52
+ filter_key_list: List[str] = Field(
53
+ ..., description="List of metadata filter field names"
54
+ )
55
+ filter_value_list: List[str] = Field(
56
+ ...,
57
+ description=(
58
+ "List of metadata filter field values (corresponding to names specified in filter_key_list)"
59
+ )
60
+ )
61
+
62
+ def auto_retrieve_fn(
63
+ query: str, filter_key_list: List[str], filter_value_list: List[str]
64
+ ):
65
+ """Auto retrieval function.
66
+
67
+ Performs auto-retrieval from a vector database, and then applies a set of filters.
68
+
69
+ """
70
+ query = query or "Query"
71
+
72
+ # for i, (k, v) in enumerate(zip(filter_key_list, filter_value_list)):
73
+ # if k == 'token_list':
74
+ # if token not in v:
75
+ # v = ''
76
+
77
+ exact_match_filters = [
78
+ ExactMatchFilter(key=k, value=v)
79
+ for k, v in zip(filter_key_list, filter_value_list)
80
+ ]
81
+ retriever = VectorIndexRetriever(
82
+ vector_index, filters=MetadataFilters(filters=exact_match_filters), top_k=top_k
83
+ )
84
+ # query_engine = vector_index.as_query_engine(filters=MetadataFilters(filters=exact_match_filters))
85
+ query_engine = RetrieverQueryEngine.from_args(retriever)
86
+
87
+ response = query_engine.query(query)
88
+ return str(response)
89
+
90
+ # loading CSV data
91
+ sheet_names = ['Teams', 'Players', 'Schedule', 'Player_Stats']
92
+ dict_of_dfs = {sheet: get_df_from_workbook(sheet) for sheet in sheet_names}
93
+
94
+ engine = create_engine("sqlite+pysqlite:///:memory:")
95
+
96
+ for df in dict_of_dfs:
97
+ dict_of_dfs[df].to_sql(df, con=engine)
98
+
99
+ sql_database = SQLDatabase(
100
+ engine,
101
+ include_tables=list(dict_of_dfs.keys())
102
+ )
103
+
104
+ # setting up llm & service content
105
+ embed_model = OpenAIEmbedding()
106
+ chunk_size = 1000
107
+ llm = OpenAI(
108
+ temperature=0,
109
+ model="gpt-3.5-turbo",
110
+ streaming=True
111
+ )
112
+ service_context = ServiceContext.from_defaults(
113
+ llm=llm,
114
+ chunk_size=chunk_size,
115
+ embed_model=embed_model
116
+ )
117
+
118
+ # setting up vector store
119
+ chroma_client = chromadb.Client()
120
+ chroma_collection = chroma_client.create_collection("all_data")
121
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
122
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
123
+ vector_index = VectorStoreIndex([], storage_context=storage_context, service_context=service_context)
124
+
125
+ vector_index.insert_nodes(docAdditionalSamples)
126
+
127
+ # setting up metadata
128
+ top_k = 3
129
+ info_emails_players = VectorStoreInfo(
130
+ content_info="emails exchanged between NBA players",
131
+ metadata_info=[
132
+ MetadataInfo(
133
+ name="from_to",
134
+ type="str",
135
+ description="""
136
+ email sent by a player of the Golden State Warriors to any other NBA player, one of [
137
+ Stephen Curry to any NBA player,
138
+ Klay Thompson to any NBA player,
139
+ Chris Paul to any NBA player,
140
+ Andrew Wiggins to any NBA player,
141
+ Draymond Green to any NBA player,
142
+ Gary Payton II to any NBA player,
143
+ Kevon Looney to any NBA player,
144
+ Jonathan Kuminga to any NBA player,
145
+ Moses Moody to any NBA player,
146
+ Brandin Podziemski to any NBA player,
147
+ Cory Joseph to any NBA player,
148
+ Dario Šarić to any NBA player]"""
149
+ ),
150
+ ]
151
+ )
152
+
153
+ @cl.on_chat_start
154
+ def main():
155
+
156
+ sql_query_engine = NLSQLTableQueryEngine(
157
+ sql_database=sql_database,
158
+ tables=list(dict_of_dfs.keys())
159
+ )
160
+
161
+ sql_nba_tool = QueryEngineTool.from_defaults(
162
+ query_engine=sql_query_engine, #
163
+ name='sql_nba_tool',
164
+ description=("""Useful for translating a natural language query into a SQL query over tables containing:
165
+ 1. teams, containing information related to all NBA teams
166
+ 2. players, containing information about the team that each player plays for
167
+ 3. schedule, containing information related to the entire NBA game schedule
168
+ 4. player_stats, containing information related to all NBA player stats
169
+ """
170
+ ),
171
+ )
172
+
173
+ description_emails = f"""\
174
+ Use this tool to look up information about emails exchanged betweed players of the Golden State Warriors and any other NBA player.
175
+ The vector database schema is given below:
176
+ {info_emails_players.json()}
177
+ """
178
+ auto_retrieve_tool_emails = FunctionTool.from_defaults(
179
+ fn=auto_retrieve_fn,
180
+ name='auto_retrieve_tool_emails',
181
+ description=description_emails,
182
+ fn_schema=AutoRetrieveModel
183
+ )
184
+
185
+ agent = OpenAIAgent.from_tools(
186
+ # agent = ReActAgent.from_tools(
187
+ tools = [sql_nba_tool,
188
+ auto_retrieve_tool_emails,
189
+ ],
190
+ llm=llm,
191
+ verbose=True,
192
+ )
193
+
194
+ cl.user_session.set("agent", agent)
195
+
196
+ @cl.on_message
197
+ async def main(message):
198
+ agent = cl.user_session.get("agent")
199
+
200
+ # response = agent.chat(message.content)
201
+ response = agent.chat(message)
202
+
203
+ response_message = cl.Message(content="")
204
+
205
+ # for token in response.response:
206
+ # await response_message.stream_token(token=token)
207
+
208
+ if response.response:
209
+ response_message.content = response.response
210
+
211
+ await response_message.send()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ openai==0.27.8
2
+ llama-index==0.8.6
3
+ nltk==3.8.1
4
+ chromadb==0.4.6
5
+ tiktoken==0.4.0
6
+ sentence-transformers==2.2.2
7
+ pydantic==1.10.11
8
+ chainlit
9
+ pandas