Daniel Tse commited on
Commit
e3d61f6
1 Parent(s): c274bf0

Set max_new_tokens.

Browse files

Change summarization model to use Google

Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -28,6 +28,7 @@ def transcribe_audio(audiofile):
28
  model="openai/whisper-medium",
29
  chunk_length_s=30,
30
  device=device,
 
31
  )
32
 
33
  transcription = pipe(audiofile, batch_size=8)["text"]
@@ -39,10 +40,16 @@ def transcribe_audio(audiofile):
39
  return transcription
40
 
41
  def summarize_podcast(audiotranscription):
42
- sum_pipe = pipeline("summarization",model="philschmid/flan-t5-base-samsum",clean_up_tokenization_spaces=True)
43
- summary = ""
44
-
45
- return summary
 
 
 
 
 
 
46
 
47
 
48
  st.markdown("# Podcast Q&A")
 
28
  model="openai/whisper-medium",
29
  chunk_length_s=30,
30
  device=device,
31
+ max_new_tokens=60,
32
  )
33
 
34
  transcription = pipe(audiofile, batch_size=8)["text"]
 
40
  return transcription
41
 
42
  def summarize_podcast(audiotranscription):
43
+ sum_pipe = pipeline("summarization",model="google/flan-t5-base",clean_up_tokenization_spaces=True)
44
+ summarized_text = sum_pipe(audiotranscription,
45
+ max_length=1000,
46
+ min_length=100,
47
+ do_sample=False,
48
+ early_stopping=True,
49
+ num_beams=4)
50
+ summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
51
+
52
+ return summarized_text
53
 
54
 
55
  st.markdown("# Podcast Q&A")