Daniel Tse commited on
Commit
e7956b2
1 Parent(s): 555a0ea

Implement Summary

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -10,6 +10,7 @@ def transcribe_audio(audiofile):
10
  st.session_state['audio'] = audiofile
11
  print(f"audio_file_session_state:{st.session_state['audio'] }")
12
 
 
13
  #get size of audio file
14
  audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
15
  print(f"audio file size:{audio_size}")
@@ -20,6 +21,7 @@ def transcribe_audio(audiofile):
20
  podcast_duration = podcast.duration_seconds
21
  print(f"Audio Duration: {podcast_duration}")
22
 
 
23
  whisper_model = whisper.load_model("small.en")
24
  transcription = whisper_model.transcribe(audiofile)
25
  st.session_state['transcription'] = transcription
@@ -29,15 +31,9 @@ def transcribe_audio(audiofile):
29
  return transcription
30
 
31
  def summarize_podcast(audiotranscription):
32
- sum_pipe = pipeline("summarization",model="google/flan-t5-base",clean_up_tokenization_spaces=True)
33
- summarized_text = sum_pipe(audiotranscription,
34
- max_length=1000,
35
- min_length=100,
36
- do_sample=False,
37
- early_stopping=True,
38
- num_beams=4)
39
- summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
40
 
 
41
  return summarized_text
42
 
43
 
@@ -55,7 +51,19 @@ st.markdown(
55
 
56
  st.audio("marketplace-2023-06-14.mp3")
57
  if st.button("Process Audio File"):
58
- transcribe_audio("marketplace-2023-06-14.mp3")
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  #audio_file = st.file_uploader("Upload audio copy of file", key="upload", type=['.mp3'])
61
 
 
10
  st.session_state['audio'] = audiofile
11
  print(f"audio_file_session_state:{st.session_state['audio'] }")
12
 
13
+ st.info("Getting size of file")
14
  #get size of audio file
15
  audio_size = round(os.path.getsize(st.session_state['audio'])/(1024*1024),1)
16
  print(f"audio file size:{audio_size}")
 
21
  podcast_duration = podcast.duration_seconds
22
  print(f"Audio Duration: {podcast_duration}")
23
 
24
+ st.info("Transcribing")
25
  whisper_model = whisper.load_model("small.en")
26
  transcription = whisper_model.transcribe(audiofile)
27
  st.session_state['transcription'] = transcription
 
31
  return transcription
32
 
33
  def summarize_podcast(audiotranscription):
34
+ summarizer = pipeline("summarization", model="philschmid/flan-t5-base-samsum", device=0)
 
 
 
 
 
 
 
35
 
36
+ summarized_text = summarizer(audiotranscription)
37
  return summarized_text
38
 
39
 
 
51
 
52
  st.audio("marketplace-2023-06-14.mp3")
53
  if st.button("Process Audio File"):
54
+ podcast_text = transcribe_audio("marketplace-2023-06-14.mp3")
55
+ #write text out
56
+ st.expander("See Transcription"):
57
+ st.caption(podcast_text)
58
+
59
+ #Summarize Text
60
+ podcast_summary = summarize_podcast(podcast_text)
61
+ st.markdown(
62
+ """
63
+ ##Summary of Text
64
+ """
65
+ )
66
+ st.text(podcast_summary)
67
 
68
  #audio_file = st.file_uploader("Upload audio copy of file", key="upload", type=['.mp3'])
69