danny042 commited on
Commit
81e2592
โ€ข
1 Parent(s): e061915

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -71
app.py CHANGED
@@ -19,17 +19,11 @@ from langchain.memory import StreamlitChatMessageHistory
19
  from gtts import gTTS
20
  from IPython.display import Audio, display
21
 
22
- from io import BytesIO
23
- import speech_recognition as sr
24
-
25
- from langchain.callbacks import get_openai_callback
26
- from langchain.memory import StreamlitChatMessageHistory
27
-
28
  def main():
29
  st.set_page_config(
30
  page_title="์ฐจ๋Ÿ‰์šฉ Q&A ์ฑ—๋ด‡",
31
- page_icon=":car:"
32
- )
33
 
34
  st.title("์ฐจ๋Ÿ‰์šฉ Q&A ์ฑ—๋ด‡ :car:")
35
 
@@ -53,9 +47,9 @@ def main():
53
  st.stop()
54
  files_text = get_text(uploaded_files)
55
  text_chunks = get_text_chunks(files_text)
56
- vector_store = get_vectorstore(text_chunks)
57
 
58
- st.session_state.conversation = get_conversation_chain(vector_store, openai_api_key)
59
 
60
  st.session_state.processComplete = True
61
 
@@ -77,70 +71,32 @@ def main():
77
  st.markdown(query)
78
 
79
  with st.chat_message("assistant"):
80
- # STT ์ฝ”๋“œ ์ถ”๊ฐ€
81
- user_voice_input = stt()
82
- if user_voice_input:
83
- # Add user's voice input to chat history
84
- st.session_state.messages.append({"role": "user", "content": user_voice_input})
85
-
86
- # Continue with the rest of the conversation logic...
87
- chain = st.session_state.conversation
88
-
89
- with st.spinner("Thinking..."):
90
- result = chain({"question": query})
91
- with get_openai_callback() as cb:
92
- st.session_state.chat_history = result['chat_history']
93
- response = result['answer']
94
- source_documents = result['source_documents']
95
-
96
- st.markdown(response)
97
- with st.expander("์ฐธ๊ณ  ๋ฌธ์„œ ํ™•์ธ"):
98
- st.markdown(source_documents[0].metadata['source'], help=source_documents[0].page_content)
99
- st.markdown(source_documents[1].metadata['source'], help=source_documents[1].page_content)
100
- st.markdown(source_documents[2].metadata['source'], help=source_documents[2].page_content)
101
-
102
- # TTS ์ฝ”๋“œ ์ถ”๊ฐ€
103
- tts("์ด๊ฒƒ์€ ์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜๋œ ๋‹ต๋ณ€์ž…๋‹ˆ๋‹ค.")
104
-
105
- # Add assistant message to chat history
106
- st.session_state.messages.append({"role": "assistant", "content": response})
107
-
108
- # ...
109
-
110
- # STT ํ•จ์ˆ˜ ์ถ”๊ฐ€
111
- def stt():
112
- recognizer = sr.Recognizer()
113
-
114
- with sr.Microphone() as source:
115
- st.write("๋งํ•ด๋ณด์„ธ์š”...")
116
- recognizer.adjust_for_ambient_noise(source)
117
- audio = recognizer.listen(source, timeout=5)
118
-
119
- try:
120
- text = recognizer.recognize_google(audio, language="ko-KR")
121
- st.write("์ธ์‹๋œ ํ…์ŠคํŠธ: {}".format(text))
122
- return text
123
- except sr.UnknownValueError:
124
- st.write("์Œ์„ฑ์„ ์ธ์‹ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
125
- return None
126
- except sr.RequestError as e:
127
- st.write("Google Speech Recognition ์„œ๋น„์Šค์— ์ ‘๊ทผํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค; {0}".format(e))
128
- return None
129
-
130
- # TTS ํ•จ์ˆ˜ ์ถ”๊ฐ€
131
- def tts(text):
132
- st.write("์Œ์„ฑ์œผ๋กœ ๋ณ€ํ™˜ ์ค‘...")
133
- tts = gTTS(text=text, lang='ko')
134
- audio_stream = BytesIO()
135
- tts.save(audio_stream)
136
- st.audio(audio_stream, format='audio/wav')
137
 
 
 
 
 
 
138
  def tiktoken_len(text):
139
  tokenizer = tiktoken.get_encoding("cl100k_base")
140
  tokens = tokenizer.encode(text)
141
  return len(tokens)
142
 
143
-
144
  def get_text(docs):
145
  doc_list = []
146
 
@@ -156,7 +112,7 @@ def get_text(docs):
156
  doc_list.extend(documents)
157
  return doc_list
158
 
159
-
160
  def get_text_chunks(text):
161
  text_splitter = RecursiveCharacterTextSplitter(
162
  chunk_size=1000,
@@ -166,7 +122,7 @@ def get_text_chunks(text):
166
  chunks = text_splitter.split_documents(text)
167
  return chunks
168
 
169
-
170
  def get_vectorstore(text_chunks):
171
  embeddings = HuggingFaceEmbeddings(
172
  model_name="jhgan/ko-sroberta-multitask",
@@ -176,7 +132,7 @@ def get_vectorstore(text_chunks):
176
  vectordb = FAISS.from_documents(text_chunks, embeddings)
177
  return vectordb
178
 
179
-
180
  def get_conversation_chain(vetorestore, openai_api_key):
181
  llm = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0)
182
  conversation_chain = ConversationalRetrievalChain.from_llm(
 
19
  from gtts import gTTS
20
  from IPython.display import Audio, display
21
 
22
+ #์‚ฌ์ดํŠธ ๊ด€๋ จ ํ•จ์ˆ˜
 
 
 
 
 
23
  def main():
24
  st.set_page_config(
25
  page_title="์ฐจ๋Ÿ‰์šฉ Q&A ์ฑ—๋ด‡",
26
+ page_icon=":car:")
 
27
 
28
  st.title("์ฐจ๋Ÿ‰์šฉ Q&A ์ฑ—๋ด‡ :car:")
29
 
 
47
  st.stop()
48
  files_text = get_text(uploaded_files)
49
  text_chunks = get_text_chunks(files_text)
50
+ vetorestore = get_vectorstore(text_chunks)
51
 
52
+ st.session_state.conversation = get_conversation_chain(vetorestore, openai_api_key)
53
 
54
  st.session_state.processComplete = True
55
 
 
71
  st.markdown(query)
72
 
73
  with st.chat_message("assistant"):
74
+ chain = st.session_state.conversation
75
+
76
+ with st.spinner("Thinking..."):
77
+ result = chain({"question": query})
78
+ with get_openai_callback() as cb:
79
+ st.session_state.chat_history = result['chat_history']
80
+ response = result['answer']
81
+ source_documents = result['source_documents']
82
+
83
+ st.markdown(response)
84
+ with st.expander("์ฐธ๊ณ  ๋ฌธ์„œ ํ™•์ธ"):
85
+ st.markdown(source_documents[0].metadata['source'], help=source_documents[0].page_content)
86
+ st.markdown(source_documents[1].metadata['source'], help=source_documents[1].page_content)
87
+ st.markdown(source_documents[2].metadata['source'], help=source_documents[2].page_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ # Add assistant message to chat history
90
+ st.session_state.messages.append({"role": "assistant", "content": response})
91
+
92
+
93
+ #ํ† ํฐํ™” ์‹œํ‚ค๋Š” ๊ณณ
94
  def tiktoken_len(text):
95
  tokenizer = tiktoken.get_encoding("cl100k_base")
96
  tokens = tokenizer.encode(text)
97
  return len(tokens)
98
 
99
+ #pdfload์ฝ”๋“œ
100
  def get_text(docs):
101
  doc_list = []
102
 
 
112
  doc_list.extend(documents)
113
  return doc_list
114
 
115
+ #textsplitter ์ฝ”๋“œ
116
  def get_text_chunks(text):
117
  text_splitter = RecursiveCharacterTextSplitter(
118
  chunk_size=1000,
 
122
  chunks = text_splitter.split_documents(text)
123
  return chunks
124
 
125
+ #์ž„๋ฒ ๋”ฉ ๋ฐ ๋ฒกํ„ฐ์ €์žฅ ์ฝ”๋“œ
126
  def get_vectorstore(text_chunks):
127
  embeddings = HuggingFaceEmbeddings(
128
  model_name="jhgan/ko-sroberta-multitask",
 
132
  vectordb = FAISS.from_documents(text_chunks, embeddings)
133
  return vectordb
134
 
135
+ #๋ฆฌํŠธ๋ฆฌ๋ฒ„ ๋ฐ llm์ฝ”๋“œ
136
  def get_conversation_chain(vetorestore, openai_api_key):
137
  llm = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0)
138
  conversation_chain = ConversationalRetrievalChain.from_llm(