ViXuan commited on
Commit
94a2f2d
1 Parent(s): 0ef3a2a

Working Questgen implemented

Browse files
Files changed (3) hide show
  1. .gitignore +3 -1
  2. app.py +54 -19
  3. requirements.txt +4 -1
.gitignore CHANGED
@@ -1,4 +1,6 @@
1
  venv
2
  .vscode
3
  s2v_reddit_2015_md.tar.gz
4
- __pycache__
 
 
 
1
  venv
2
  .vscode
3
  s2v_reddit_2015_md.tar.gz
4
+ __pycache__
5
+ s2v_old
6
+ ._s2v_old
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import time
2
  import gradio as gr
3
  from transformers import AutoTokenizer
@@ -5,6 +7,19 @@ import os
5
  from pathlib import Path
6
  from FastT5 import get_onnx_runtime_sessions, OnnxT5
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  trained_model_path = './t5_squad_v1/'
10
 
@@ -42,25 +57,10 @@ def get_question(sentence, answer, mdl, tknizer):
42
  dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]
43
 
44
  Question = dec[0].replace("question:", "")
45
- Ouestion = Question.strip()
46
  return Question
47
 
48
 
49
- # context = "Ramsri loves to watch cricket during his free time"
50
- # answer = "cricket"
51
- context = "Donald Trump is an American media personality and businessman who served as the 45th president of the United States."
52
- answer = "Donald Trump"
53
- ques = get_question(context, answer, model, tokenizer)
54
- print("question: ", ques)
55
-
56
-
57
- context = gr.components.Textbox(
58
- lines=5, placeholder="Enter paragraph/context here...")
59
- answer = gr.components.Textbox(
60
- lines=3, placeholder="Enter answer/keyword here...")
61
- question = gr.components.Textbox(type="text", label="Question")
62
-
63
-
64
  def generate_question(context, answer):
65
  start_time = time.time() # Record the start time
66
  result = get_question(context, answer, model, tokenizer)
@@ -70,10 +70,45 @@ def generate_question(context, answer):
70
  return result
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  iface = gr.Interface(
74
- fn=generate_question,
75
- inputs=[context, answer],
76
- outputs=question
 
 
77
  )
78
 
79
  iface.launch()
 
1
+ import pke
2
+ from sense2vec import Sense2Vec
3
  import time
4
  import gradio as gr
5
  from transformers import AutoTokenizer
 
7
  from pathlib import Path
8
  from FastT5 import get_onnx_runtime_sessions, OnnxT5
9
 
10
+ commands = [
11
+ "curl -LO https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz",
12
+ "tar -xvf s2v_reddit_2015_md.tar.gz",
13
+ ]
14
+
15
+ for command in commands:
16
+ return_code = os.system(command)
17
+ if return_code == 0:
18
+ print(f"Command '{command}' executed successfully")
19
+ else:
20
+ print(f"Command '{command}' failed with return code {return_code}")
21
+
22
+ s2v = Sense2Vec().from_disk("s2v_old")
23
 
24
  trained_model_path = './t5_squad_v1/'
25
 
 
57
  dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]
58
 
59
  Question = dec[0].replace("question:", "")
60
+ Question = Question.strip()
61
  return Question
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def generate_question(context, answer):
65
  start_time = time.time() # Record the start time
66
  result = get_question(context, answer, model, tokenizer)
 
70
  return result
71
 
72
 
73
+ def generate_mcq(context):
74
+ extractor = pke.unsupervised.TopicRank()
75
+ extractor.load_document(input=context, language='en')
76
+ extractor.candidate_selection(pos={"NOUN", "PROPN", "ADJ"})
77
+ extractor.candidate_weighting()
78
+ keyphrases = extractor.get_n_best(n=10)
79
+
80
+ results = []
81
+
82
+ for keyword, _ in keyphrases:
83
+ original_keyword = keyword
84
+ keyword = original_keyword.lower().replace(" ", "_")
85
+ sense = s2v.get_best_sense(keyword)
86
+
87
+ if sense is not None:
88
+ most_similar = s2v.most_similar(sense, n=2)
89
+ distractors = [word.split("|")[0].lower().replace(
90
+ "_", " ") for word, _ in most_similar]
91
+
92
+ question = generate_question(context, original_keyword)
93
+
94
+ result = {
95
+ "Question": question,
96
+ "Keyword": original_keyword,
97
+ "Distractor1": distractors[0],
98
+ "Distractor2": distractors[1]
99
+ }
100
+
101
+ results.append(result)
102
+
103
+ return results
104
+
105
+
106
  iface = gr.Interface(
107
+ fn=generate_mcq,
108
+ inputs=gr.Textbox(label="Context", type='text'),
109
+ outputs=gr.JSON(value=list),
110
+ title="Questgen AI",
111
+ description="Enter a context to generate MCQs for keywords."
112
  )
113
 
114
  iface.launch()
requirements.txt CHANGED
@@ -5,4 +5,7 @@ torch
5
  transformers
6
  sentencepiece
7
  progress
8
- psutil
 
 
 
 
5
  transformers
6
  sentencepiece
7
  progress
8
+ psutil
9
+ sense2vec
10
+ git+https://github.com/boudinfl/pke.git
11
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl