Daryl Fung commited on
Commit
9e61b41
1 Parent(s): 01ea445

fix bar plot

Browse files
keyphrase_extraction.py CHANGED
@@ -34,8 +34,8 @@ def get_top_key_phrases(text, top_n, save_output):
34
  scores = list(zip(*phrases_ranking.items()))[1]
35
  keyword_df = pd.DataFrame({'words': phrases[:top_n], 'scores': scores[:top_n]})
36
  plt.title("Word Count")
37
- plt.figure(figsize=(8, 24))
38
- sns.catplot(data=keyword_df, x='words', y='scores', kind='bar', palette='blend:#7AB,#EDA', aspect=1.5)
39
  plt.xticks(rotation=-10, fontsize=6)
40
  plt.savefig(save_output, dpi=300)
41
  plt.close()
 
34
  scores = list(zip(*phrases_ranking.items()))[1]
35
  keyword_df = pd.DataFrame({'words': phrases[:top_n], 'scores': scores[:top_n]})
36
  plt.title("Word Count")
37
+ plt.figure(figsize=(24, 8))
38
+ sns.barplot(data=keyword_df, y='words', x='scores', palette='blend:#7AB,#EDA', aspect=1.5)
39
  plt.xticks(rotation=-10, fontsize=6)
40
  plt.savefig(save_output, dpi=300)
41
  plt.close()
keyword_extraction.py CHANGED
@@ -37,8 +37,8 @@ def keyword_extract(doc, kw_model, n_grams, save_output='results/'):
37
  scores = list(zip(*keyword_onegram))[1]
38
  keyword_df = pd.DataFrame({'words': words, 'scores': scores})
39
  plt.title("Word Count")
40
- plt.figure(figsize=(8, 24))
41
- sns.catplot(data=keyword_df, x='words', y='scores', kind='bar', palette='blend:#7AB,#EDA', aspect=1.5)
42
  plt.xticks(rotation=-10, fontsize=8-n_grams)
43
  plt.savefig(save_output, dpi=300)
44
  plt.close()
 
37
  scores = list(zip(*keyword_onegram))[1]
38
  keyword_df = pd.DataFrame({'words': words, 'scores': scores})
39
  plt.title("Word Count")
40
+ plt.figure(figsize=(24, 8))
41
+ sns.barplot(data=keyword_df, y='words', x='scores', palette='blend:#7AB,#EDA', aspect=1.5)
42
  plt.xticks(rotation=-10, fontsize=8-n_grams)
43
  plt.savefig(save_output, dpi=300)
44
  plt.close()
main.py CHANGED
@@ -71,9 +71,9 @@ filename.open('w', encoding='utf-8').write(svg)
71
 
72
  # run keyword extraction
73
  kw_model = KeyBERT()
74
- keyword_extract(descriptions, kw_model, 1, save_output=f'results/DAD/{1}_keyword.png')
75
- keyword_extract(descriptions, kw_model, 2, save_output=f'results/DAD/{2}_keyword.png')
76
- keyword_extract(descriptions, kw_model, 3, save_output=f'results/DAD/{3}_keyword.png')
77
  keywords = kw_model.extract_keywords(doc, highlight=True)
78
  print(keywords)
79
 
 
71
 
72
  # run keyword extraction
73
  kw_model = KeyBERT()
74
+ keyword_extract(doc, kw_model, 1, save_output=f'results/DAD/{1}_keyword.png')
75
+ keyword_extract(doc, kw_model, 2, save_output=f'results/DAD/{2}_keyword.png')
76
+ keyword_extract(doc, kw_model, 3, save_output=f'results/DAD/{3}_keyword.png')
77
  keywords = kw_model.extract_keywords(doc, highlight=True)
78
  print(keywords)
79
 
results/DAD/1_gram.png CHANGED
results/DAD/2_gram.png CHANGED
results/DAD/3_gram.png CHANGED
results/DAD/ner.html CHANGED
@@ -6,7 +6,7 @@
6
 
7
  <body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ltr">
8
  <figure style="margin-bottom: 6rem">
9
- <div class="entities" style="line-height: 2.5; direction: ltr">CIHI</br>Database that collects administrative clinical and demographic information on hospital discharges including deaths signouts and transfers Some provinces and territories also use the
10
  <mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
11
  DAD
12
  <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>
@@ -211,7 +211,7 @@
211
  Captures
212
  <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
213
  </mark>
214
- administrative clinical and demographic information on discharges for acute care facilities including deaths signouts and transfers</br></div>
215
  </figure>
216
  </body>
217
  </html>
 
6
 
7
  <body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ltr">
8
  <figure style="margin-bottom: 6rem">
9
+ <div class="entities" style="line-height: 2.5; direction: ltr"></br>CIHI</br>Database that collects administrative clinical and demographic information on hospital discharges including deaths signouts and transfers Some provinces and territories also use the
10
  <mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
11
  DAD
12
  <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>
 
211
  Captures
212
  <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
213
  </mark>
214
+ administrative clinical and demographic information on discharges for acute care facilities including deaths signouts and transfers</br> </div>
215
  </figure>
216
  </body>
217
  </html>
word.py CHANGED
@@ -33,8 +33,8 @@ def show_gram_plot(doc, n_grams, top_words=10, save_output='results/'):
33
  words, count = zip(*count_tokens)
34
  tokens_df = pd.DataFrame({'word': words, 'count': count})
35
  plt.title("Word Count")
36
- plt.figure(figsize=(8, 24))
37
- sns.catplot(data=tokens_df, x='word', y='count', kind='bar', palette='blend:#7AB,#EDA')
38
  plt.xticks(rotation=-10, fontsize=8-n_grams)
39
  plt.savefig(save_output, dpi=300)
40
  plt.close()
 
33
  words, count = zip(*count_tokens)
34
  tokens_df = pd.DataFrame({'word': words, 'count': count})
35
  plt.title("Word Count")
36
+ plt.figure(figsize=(24, 8))
37
+ sns.barplot(data=tokens_df, y='word', x='count', palette='blend:#7AB,#EDA')
38
  plt.xticks(rotation=-10, fontsize=8-n_grams)
39
  plt.savefig(save_output, dpi=300)
40
  plt.close()