Spaces:
Sleeping
Sleeping
File size: 2,803 Bytes
c1b4f26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from paddleocr import PaddleOCR
from vietocr.tool.config import Cfg
from vietocr.tool.predictor import Predictor
from utils.config import Config
import requests
import numpy as np
from PIL import Image, ImageTransform
class OCRDetector:
def __init__(self) -> None:
self.paddle_ocr = PaddleOCR(lang='en',
use_angle_cls=False,
use_gpu=True if Config.device == "cpu" else False,
show_log=False )
# config['weights'] = './weights/transformerocr.pth'
vietocr_config = Cfg.load_config_from_name('vgg_transformer')
vietocr_config['weights'] = Config.ocr_path
vietocr_config['cnn']['pretrained']=False
vietocr_config['device'] = Config.device
vietocr_config['predictor']['beamsearch']=False
self.viet_ocr = Predictor(vietocr_config)
def find_box(self, image):
'''Xác định box dựa vào mô hình paddle_ocr'''
result = self.paddle_ocr.ocr(image, cls = False, rec=False)
result = result[0]
# Extracting detected components
boxes = result #[res[0] for res in result]
boxes = np.array(boxes).astype(int)
# scores = [res[1][1] for res in result]
return boxes
def cut_image_polygon(self, image, box):
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = box
w = x2 - x1
h = y4 - y1
scl = h//7
new_box = [max(x1-scl,0), max(y1 - scl, 0)], [x2+scl, y2-scl], [x3+scl, y3+scl], [x4-scl, y4+scl]
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = new_box
# Define 8-tuple with x,y coordinates of top-left, bottom-left, bottom-right and top-right corners and apply
transform = [x1, y1, x4, y4, x3, y3, x2, y2]
result = image.transform((w,h), ImageTransform.QuadTransform(transform))
return result
def vietnamese_text(self, boxes, image):
'''Xác định text dựa vào mô hình viet_ocr'''
results = []
for box in boxes:
try:
cut_image = self.cut_image_polygon(image, box)
# cut_image = Image.fromarray(np.uint8(cut_image))
text, score = self.viet_ocr.predict(cut_image, return_prob=True)
if score > Config.vietocr_threshold:
results.append({"text": text,
"score": score,
"box": box})
except:
continue
return results
#Merge
def text_detector(self, image_path):
if image_path.startswith("https://"):
image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB")
else:
image = Image.open(image_path).convert("RGB")
# np_image = np.array(image)
boxes = self.find_box(image_path)
if not boxes.any():
return None
results = self.vietnamese_text(boxes, image)
if results != []:
return results
else:
return None |