AkashKhamkar commited on
Commit
521e17f
1 Parent(s): ad19100

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -9
app.py CHANGED
@@ -17,7 +17,6 @@ nltk.download('stopwords')
17
  from PIL import Image
18
  from PIL import ImageDraw
19
  from PIL import ImageFont
20
- import time
21
 
22
 
23
  if not os.path.exists('./transcripts'):
@@ -151,10 +150,9 @@ def clean_text(link,start,end):
151
  return texts
152
 
153
  sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
 
154
  text = segment(transcript.at[0,'text'])
155
-
156
  for i in range(len(text)):
157
- #st.write('iteration no: ',i)
158
  sf.loc[i, 'Segmented_Text'] = text[i]
159
  sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
160
 
@@ -166,7 +164,6 @@ def clean_text(link,start,end):
166
  return texts
167
 
168
  for i in range(len(sf)):
169
- st.write(sf.at[i, 'Segmented_Text'])
170
  sf.loc[i, 'Segmented_Text'] = word_seg(sf.at[i, 'Segmented_Text'])
171
  sf.loc[i, 'Lengths'] = len(tokenizer(sf.at[i, 'Segmented_Text'])['input_ids'])
172
 
@@ -203,11 +200,8 @@ def clean_text(link,start,end):
203
  def t5_summarizer(link,start, end):
204
  input_text = clean_text(link,start,end)
205
  lst_outputs = []
206
- tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/t5_large_3e-4_on_v2_dataset")
207
- st.write('Loading the model!')
208
- start_time = time.time()
209
- model1 = AutoModelForSeq2SeqLM.from_pretrained("CareerNinja/t5_large_3e-4_on_v2_dataset")
210
- st.write('Model loading compelete, time taken: ',time.time()-start_time)
211
  summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
212
  print(f""" Entered summarizer ! """)
213
  st.write('Below is the summary of the given URL: ')
 
17
  from PIL import Image
18
  from PIL import ImageDraw
19
  from PIL import ImageFont
 
20
 
21
 
22
  if not os.path.exists('./transcripts'):
 
150
  return texts
151
 
152
  sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
153
+
154
  text = segment(transcript.at[0,'text'])
 
155
  for i in range(len(text)):
 
156
  sf.loc[i, 'Segmented_Text'] = text[i]
157
  sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
158
 
 
164
  return texts
165
 
166
  for i in range(len(sf)):
 
167
  sf.loc[i, 'Segmented_Text'] = word_seg(sf.at[i, 'Segmented_Text'])
168
  sf.loc[i, 'Lengths'] = len(tokenizer(sf.at[i, 'Segmented_Text'])['input_ids'])
169
 
 
200
  def t5_summarizer(link,start, end):
201
  input_text = clean_text(link,start,end)
202
  lst_outputs = []
203
+ tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/t5-large_3e-4")
204
+ model1 = AutoModelForSeq2SeqLM.from_pretrained("CareerNinja/t5-large_3e-4")
 
 
 
205
  summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
206
  print(f""" Entered summarizer ! """)
207
  st.write('Below is the summary of the given URL: ')