ljyflores commited on
Commit
850fcc9
1 Parent(s): 16939ac

Add cached files and update app

Browse files
__pycache__/utils_casemaker.cpython-310.pyc ADDED
Binary file (8.49 kB). View file
 
__pycache__/utils_report_parser.cpython-310.pyc ADDED
Binary file (582 Bytes). View file
 
app.py CHANGED
@@ -20,7 +20,7 @@ if uploaded_file is not None:
20
  )
21
 
22
  patient_options = {
23
- f"Patient {patient_id}: {len(reports[patient_id])} reports": patient_id
24
  for patient_id in reports.keys()
25
  }
26
  selected_patient_string = st.radio(
@@ -34,7 +34,17 @@ if uploaded_file is not None:
34
  summary_by_organ = casemaker.parse_records(reports[selected_patient_id])
35
  summary_by_organ = casemaker.format_reports(summary_by_organ)
36
 
37
- for chosen_organ in summary_by_organ.keys():
38
- if summary_by_organ[chosen_organ]:
39
- st.header(chosen_organ.capitalize())
40
- st.write(summary_by_organ[chosen_organ])
 
 
 
 
 
 
 
 
 
 
 
20
  )
21
 
22
  patient_options = {
23
+ f"Patient {patient_id} ({len(reports[patient_id])} reports)": patient_id
24
  for patient_id in reports.keys()
25
  }
26
  selected_patient_string = st.radio(
 
34
  summary_by_organ = casemaker.parse_records(reports[selected_patient_id])
35
  summary_by_organ = casemaker.format_reports(summary_by_organ)
36
 
37
+ # Display the report
38
+ col1, col2 = st.columns(2)
39
+ with col1:
40
+ st.subheader("Original")
41
+ for report in reports[selected_patient_id]:
42
+ st.write(f"**Report {report.date}**")
43
+ st.write(report.text)
44
+
45
+ with col2:
46
+ st.subheader("With Casemaker")
47
+ for chosen_organ in summary_by_organ.keys():
48
+ if summary_by_organ[chosen_organ]:
49
+ st.header(chosen_organ.capitalize())
50
+ st.write(summary_by_organ[chosen_organ])
utils_casemaker.py CHANGED
@@ -16,12 +16,14 @@ from transformers import (
16
  pipeline,
17
  )
18
 
 
19
  @dataclass
20
  class Report:
21
- patient_id: str|int
22
  text: str
23
  date: str
24
- summary: str|None = None
 
25
 
26
  def clean(s: str) -> str:
27
  s = s.replace("\n", " ") # Concatenate into one string
@@ -64,7 +66,7 @@ def format_casemaker_data(
64
  .groupby("patient_id")
65
  .apply(lambda df: df[["patient_id", "text", "date"]].to_dict("records"))
66
  )
67
- reports_by_patient = dict[str,Sequence[Report]]()
68
  for patient_id, report_list in zip(df.index, df):
69
  patient_id = str(patient_id)
70
  report_list = [Report(**report) for report in report_list]
@@ -151,11 +153,8 @@ class CaseMaker:
151
  return report_string_by_organ
152
 
153
  def trim_to_relevant_portion(self, report: str):
154
- # Cut the report to the findings
155
- report = get_section_from_report(report, "findings")
156
-
157
  # Only keep sentences with symptoms and disease descriptions
158
- relevant_sentences = []
159
  for sentence in sent_tokenize(report):
160
  if any(
161
  [
@@ -163,7 +162,7 @@ class CaseMaker:
163
  for ent in self.ner_pipe(sentence)
164
  ]
165
  ):
166
- relevant_sentences.append(sentence)
167
  return "\n".join(relevant_sentences)
168
 
169
  def summarize_report(self, text: str) -> str:
@@ -219,12 +218,18 @@ class CaseMaker:
219
  must contain "text" and "date" keys
220
  """
221
 
222
- # For each organ, collect a list of relevant records containing the text and date
223
  reports_by_organ = dict[str, Sequence[Report]]()
224
  for report in reports:
225
- report_by_organ = self.parse_report_by_organ(report.text)
 
 
 
 
226
  for organ, report_text in report_by_organ.items():
227
- organ_level_record = Report(text=report_text, date=report.date, patient_id=report.patient_id)
 
 
228
  if organ in reports_by_organ:
229
  reports_by_organ[organ].append(organ_level_record)
230
  else:
 
16
  pipeline,
17
  )
18
 
19
+
20
  @dataclass
21
  class Report:
22
+ patient_id: str | int
23
  text: str
24
  date: str
25
+ summary: str | None = None
26
+
27
 
28
  def clean(s: str) -> str:
29
  s = s.replace("\n", " ") # Concatenate into one string
 
66
  .groupby("patient_id")
67
  .apply(lambda df: df[["patient_id", "text", "date"]].to_dict("records"))
68
  )
69
+ reports_by_patient = dict[str, Sequence[Report]]()
70
  for patient_id, report_list in zip(df.index, df):
71
  patient_id = str(patient_id)
72
  report_list = [Report(**report) for report in report_list]
 
153
  return report_string_by_organ
154
 
155
  def trim_to_relevant_portion(self, report: str):
 
 
 
156
  # Only keep sentences with symptoms and disease descriptions
157
+ relevant_sentences = list[str]()
158
  for sentence in sent_tokenize(report):
159
  if any(
160
  [
 
162
  for ent in self.ner_pipe(sentence)
163
  ]
164
  ):
165
+ relevant_sentences.append(str(sentence))
166
  return "\n".join(relevant_sentences)
167
 
168
  def summarize_report(self, text: str) -> str:
 
218
  must contain "text" and "date" keys
219
  """
220
 
221
+ # Split the reports by organ
222
  reports_by_organ = dict[str, Sequence[Report]]()
223
  for report in reports:
224
+ # Cut the report to the findings
225
+ report_findings = get_section_from_report(report.text, "findings")
226
+
227
+ # For each organ, collect a list of relevant records containing the text and date
228
+ report_by_organ = self.parse_report_by_organ(report_findings)
229
  for organ, report_text in report_by_organ.items():
230
+ organ_level_record = Report(
231
+ text=report_text, date=report.date, patient_id=report.patient_id
232
+ )
233
  if organ in reports_by_organ:
234
  reports_by_organ[organ].append(organ_level_record)
235
  else: