Daryl Fung commited on
Commit
721c493
1 Parent(s): f930094

fix keyword

Browse files
Files changed (3) hide show
  1. app.py +3 -3
  2. keyword_extraction.py +3 -3
  3. main.py +3 -3
app.py CHANGED
@@ -36,9 +36,9 @@ def greet(name, descriptions):
36
 
37
  # run keyword extraction
38
  kw_model = KeyBERT()
39
- keyword_extract(kw_model, 1, save_output=f'results/{name}/{1}_keyword.png')
40
- keyword_extract(kw_model, 2, save_output=f'results/{name}/{2}_keyword.png')
41
- keyword_extract(kw_model, 3, save_output=f'results/{name}/{3}_keyword.png')
42
  outputs.append(Image.open(f'results/{name}/1_keyword.png'))
43
  outputs.append(Image.open(f'results/{name}/2_keyword.png'))
44
  outputs.append(Image.open(f'results/{name}/3_keyword.png'))
 
36
 
37
  # run keyword extraction
38
  kw_model = KeyBERT()
39
+ keyword_extract(descriptions, kw_model, 1, save_output=f'results/{name}/{1}_keyword.png')
40
+ keyword_extract(descriptions, kw_model, 2, save_output=f'results/{name}/{2}_keyword.png')
41
+ keyword_extract(descriptions, kw_model, 3, save_output=f'results/{name}/{3}_keyword.png')
42
  outputs.append(Image.open(f'results/{name}/1_keyword.png'))
43
  outputs.append(Image.open(f'results/{name}/2_keyword.png'))
44
  outputs.append(Image.open(f'results/{name}/3_keyword.png'))
keyword_extraction.py CHANGED
@@ -3,7 +3,7 @@ import matplotlib.pyplot as plt
3
  import pandas as pd
4
  import seaborn as sns
5
 
6
- doc = """
7
  Database that collects, administrative, clinical and demographic information on hospital discharges (including deaths, sign-outs and transfers). Some provinces and territories also use the DAD to capture day surgery.
8
 
9
 
@@ -31,7 +31,7 @@ The Discharge Abstract Database is a database for information on all separation
31
  Captures administrative, clinical and demographic information on discharges for acute care facilities (including deaths, sign-outs and transfers).
32
  """
33
 
34
- def keyword_extract(kw_model, n_grams, save_output='results/'):
35
  keyword_onegram = kw_model.extract_keywords(doc, keyphrase_ngram_range=(1, n_grams), stop_words=None)
36
  words = list(zip(*keyword_onegram))[0]
37
  scores = list(zip(*keyword_onegram))[1]
@@ -46,5 +46,5 @@ if __name__ == '__main__':
46
  keyword_extract(kw_model, 1)
47
  keyword_extract(kw_model, 2)
48
  keyword_extract(kw_model, 3)
49
- keywords = kw_model.extract_keywords(doc, highlight=True)
50
  print(keywords)
 
3
  import pandas as pd
4
  import seaborn as sns
5
 
6
+ test_doc = """
7
  Database that collects, administrative, clinical and demographic information on hospital discharges (including deaths, sign-outs and transfers). Some provinces and territories also use the DAD to capture day surgery.
8
 
9
 
 
31
  Captures administrative, clinical and demographic information on discharges for acute care facilities (including deaths, sign-outs and transfers).
32
  """
33
 
34
+ def keyword_extract(doc, kw_model, n_grams, save_output='results/'):
35
  keyword_onegram = kw_model.extract_keywords(doc, keyphrase_ngram_range=(1, n_grams), stop_words=None)
36
  words = list(zip(*keyword_onegram))[0]
37
  scores = list(zip(*keyword_onegram))[1]
 
46
  keyword_extract(kw_model, 1)
47
  keyword_extract(kw_model, 2)
48
  keyword_extract(kw_model, 3)
49
+ keywords = kw_model.extract_keywords(test_doc, highlight=True)
50
  print(keywords)
main.py CHANGED
@@ -71,9 +71,9 @@ filename.open('w', encoding='utf-8').write(svg)
71
 
72
  # run keyword extraction
73
  kw_model = KeyBERT()
74
- keyword_extract(kw_model, 1, save_output=f'results/DAD/{1}_keyword.png')
75
- keyword_extract(kw_model, 2, save_output=f'results/DAD/{2}_keyword.png')
76
- keyword_extract(kw_model, 3, save_output=f'results/DAD/{3}_keyword.png')
77
  keywords = kw_model.extract_keywords(doc, highlight=True)
78
  print(keywords)
79
 
 
71
 
72
  # run keyword extraction
73
  kw_model = KeyBERT()
74
+ keyword_extract(descriptions, kw_model, 1, save_output=f'results/DAD/{1}_keyword.png')
75
+ keyword_extract(descriptions, kw_model, 2, save_output=f'results/DAD/{2}_keyword.png')
76
+ keyword_extract(descriptions, kw_model, 3, save_output=f'results/DAD/{3}_keyword.png')
77
  keywords = kw_model.extract_keywords(doc, highlight=True)
78
  print(keywords)
79