davila7 commited on
Commit
bbc90a3
1 Parent(s): b31da5e

more types

Browse files
Files changed (2) hide show
  1. app.py +4 -3
  2. utils.py +5 -0
app.py CHANGED
@@ -40,7 +40,7 @@ with st.sidebar:
40
 
41
  uploaded_file = st.file_uploader(
42
  "Upload a pdf, docx, or txt file",
43
- type=["pdf", "docx", "txt", "csv", "pptx"],
44
  help="Scanned documents are not supported yet!",
45
  on_change=clear_submit,
46
  )
@@ -57,8 +57,9 @@ with st.sidebar:
57
  elif uploaded_file.name.endswith(".pptx"):
58
  doc = parse_pptx(uploaded_file)
59
  else:
60
- st.error("File type not supported")
61
- doc = None
 
62
  text = text_to_docs(doc)
63
  st.write(text)
64
  try:
 
40
 
41
  uploaded_file = st.file_uploader(
42
  "Upload a pdf, docx, or txt file",
43
+ type=["pdf", "docx", "txt", "csv", "pptx", "js", "py", "json", "html", "css", "md"],
44
  help="Scanned documents are not supported yet!",
45
  on_change=clear_submit,
46
  )
 
57
  elif uploaded_file.name.endswith(".pptx"):
58
  doc = parse_pptx(uploaded_file)
59
  else:
60
+ doc = parse_any(uploaded_file)
61
+ #st.error("File type not supported")
62
+ #doc = None
63
  text = text_to_docs(doc)
64
  st.write(text)
65
  try:
utils.py CHANGED
@@ -82,6 +82,11 @@ def parse_csv(uploaded_file):
82
  # dataframe = pd.read_csv(uploaded_file)
83
  return string_data
84
 
 
 
 
 
 
85
 
86
  @st.cache(allow_output_mutation=True)
87
  def text_to_docs(text: str) -> List[Document]:
 
82
  # dataframe = pd.read_csv(uploaded_file)
83
  return string_data
84
 
85
+ @st.experimental_memo()
86
+ def parse_any(uploaded_file):
87
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
88
+ string_data = stringio.read()
89
+ return string_data
90
 
91
  @st.cache(allow_output_mutation=True)
92
  def text_to_docs(text: str) -> List[Document]: