Rauhan commited on
Commit
c603fb2
1 Parent(s): 8f4f425

UPDATE: New Endpoints

Browse files
EasyOCRModels/craft_mlt_25k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17
3
+ size 83152330
EasyOCRModels/english_g2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2272681d9d67a04e2dff396b6e95077bc19001f8f6d3593c307b9852e1c29e8
3
+ size 15143997
functions.py CHANGED
@@ -40,6 +40,7 @@ vectorEmbeddings = HuggingFaceEmbeddings(
40
  model_kwargs = model_kwargs,
41
  encode_kwargs = encode_kwargs
42
  )
 
43
  sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25")
44
  prompt = """
45
  INSTRUCTIONS:
@@ -289,7 +290,7 @@ def getLinks(url: str, timeout = 30):
289
 
290
 
291
  def getTextFromImagePDF(pdfBytes):
292
- reader = easyocr.Reader(['ch_sim','en'], gpu = True)
293
  allImages = convert_from_bytes(pdfBytes)
294
  allImages = [np.array(image) for image in allImages]
295
  text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])
 
40
  model_kwargs = model_kwargs,
41
  encode_kwargs = encode_kwargs
42
  )
43
+ reader = easyocr.Reader(['en'], gpu = True, model_storage_directory = "/app/EasyOCRModels")
44
  sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25")
45
  prompt = """
46
  INSTRUCTIONS:
 
290
 
291
 
292
  def getTextFromImagePDF(pdfBytes):
293
+ global reader
294
  allImages = convert_from_bytes(pdfBytes)
295
  allImages = [np.array(image) for image in allImages]
296
  text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])