Daniel Tse commited on
Commit
4fa56af
1 Parent(s): 4bce6ba

Remove max_len and min_len

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -9,6 +9,9 @@ from nltk import sent_tokenize
9
  nltk.download('punkt')
10
 
11
 
 
 
 
12
  def transcribe_audio(audiofile):
13
 
14
  st.session_state['audio'] = audiofile
@@ -73,10 +76,17 @@ def summarize_podcast(audiotranscription):
73
  st.info("Chunking text")
74
  text_chunks = chunk_and_preprocess_text(audiotranscription)
75
 
76
- summarized_text = summarizer(text_chunks, max_len=200,min_len=50)
 
77
  st.session_state['summary'] = summarized_text
78
  return summarized_text
79
-
 
 
 
 
 
 
80
 
81
  st.markdown("# Podcast Q&A")
82
 
 
9
  nltk.download('punkt')
10
 
11
 
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+
14
+
15
  def transcribe_audio(audiofile):
16
 
17
  st.session_state['audio'] = audiofile
 
76
  st.info("Chunking text")
77
  text_chunks = chunk_and_preprocess_text(audiotranscription)
78
 
79
+ #summarized_text = summarizer(text_chunks, max_len=200,min_len=50)
80
+ summarized_text = summarizer(text_chunks)
81
  st.session_state['summary'] = summarized_text
82
  return summarized_text
83
+
84
+ def prepare_text_for_qa(audiotranscription):
85
+
86
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
87
+ documents = text_splitter.split_documents(audiotranscription)
88
+ revalue = ""
89
+ return revalue
90
 
91
  st.markdown("# Podcast Q&A")
92