|
import time |
|
import numpy as np |
|
from PIL import Image |
|
from scipy.spatial.distance import cdist |
|
from scipy.optimize import linear_sum_assignment |
|
|
|
|
|
class SimpleAffineTransform: |
|
""" |
|
simple affine transform, only translation and scale. |
|
""" |
|
def __init__(self, translation=(0, 0), scale=1.0): |
|
self.translation = np.array(translation) |
|
self.scale = scale |
|
|
|
def estimate(self, src, dst): |
|
src_center = np.mean(src, axis=0) |
|
dst_center = np.mean(dst, axis=0) |
|
self.translation = dst_center - src_center |
|
|
|
src_dists = np.linalg.norm(src - src_center, axis=1) |
|
dst_dists = np.linalg.norm(dst - dst_center, axis=1) |
|
self.scale = np.mean(dst_dists) / (np.mean(src_dists) + 1e-10) |
|
|
|
def inverse(self): |
|
inverse_transform = AffineTransform(-self.translation, 1.0/self.scale) |
|
return inverse_transform |
|
|
|
def __call__(self, coords): |
|
return self.scale * (coords - np.mean(coords, axis=0)) + np.mean(coords, axis=0) + self.translation |
|
|
|
def residuals(self, src, dst): |
|
return np.sqrt(np.sum((self(src) - dst) ** 2, axis=1)) |
|
|
|
|
|
def norm_coords(x, left, right): |
|
if x < left: |
|
return left |
|
if x > right: |
|
return right |
|
return x |
|
|
|
def norm_same_token(token): |
|
special_map = { |
|
"\\cdot": ".", |
|
"\\mid": "|", |
|
"\\to": "\\rightarrow", |
|
"\\top": "T", |
|
"\\Tilde": "\\tilde", |
|
"\\cdots": "\\dots", |
|
"\\prime": "'", |
|
"\\ast": "*", |
|
"\\left<": "\\langle", |
|
"\\right>": "\\rangle" |
|
} |
|
if token in special_map.keys(): |
|
token = special_map[token] |
|
if token.startswith('\\left') or token.startswith('\\right'): |
|
token = token.replace("\\left", "").replace("\\right", "") |
|
if token.startswith('\\big') or token.startswith('\\Big'): |
|
if "\\" in token[4:]: |
|
token = "\\"+token[4:].split("\\")[-1] |
|
else: |
|
token = token[-1] |
|
|
|
if token in ['\\leq', '\\geq']: |
|
return token[0:-1] |
|
if token in ['\\lVert', '\\rVert', '\\Vert']: |
|
return '\\|' |
|
if token in ['\\lvert', '\\rvert', '\\vert']: |
|
return '|' |
|
if token.endswith("rightarrow"): |
|
return "\\rightarrow" |
|
if token.endswith("leftarrow"): |
|
return "\\leftarrow" |
|
if token.startswith('\\wide'): |
|
return token.replace("wide", "") |
|
if token.startswith('\\var'): |
|
return token.replace("\\var", "") |
|
return token |
|
|
|
|
|
class HungarianMatcher: |
|
def __init__( |
|
self, |
|
cost_token: float = 1, |
|
cost_position: float = 0.05, |
|
cost_order: float = 0.15, |
|
): |
|
self.cost_token = cost_token |
|
self.cost_position = cost_position |
|
self.cost_order = cost_order |
|
self.cost = {} |
|
|
|
def calculate_token_cost_old(self, box_gt, box_pred): |
|
token_cost = np.ones((len(box_gt), len(box_pred))) |
|
for i in range(token_cost.shape[0]): |
|
box1 = box_gt[i] |
|
for j in range(token_cost.shape[1]): |
|
box2 = box_pred[j] |
|
if box1['token'] == box2['token']: |
|
token_cost[i, j] = 0 |
|
elif norm_same_token(box1['token']) == norm_same_token(box2['token']): |
|
token_cost[i, j] = 0.05 |
|
return np.array(token_cost) |
|
|
|
def calculate_token_cost(self, box_gt, box_pred): |
|
token2id = {} |
|
for data in box_gt+box_pred: |
|
if data['token'] not in token2id: |
|
token2id[data['token']] = len(token2id) |
|
num_classes = len(token2id) |
|
|
|
token2id_norm = {} |
|
for data in box_gt+box_pred: |
|
if norm_same_token(data['token']) not in token2id_norm: |
|
token2id_norm[norm_same_token(data['token'])] = len(token2id_norm) |
|
num_classes_norm = len(token2id_norm) |
|
|
|
gt_token_array = [] |
|
norm_gt_token_array = [] |
|
for data in box_gt: |
|
gt_token_array.append(token2id[data['token']]) |
|
norm_gt_token_array.append(token2id_norm[norm_same_token(data['token'])]) |
|
|
|
pred_token_logits = [] |
|
norm_pred_token_logits = [] |
|
for data in box_pred: |
|
logits = [0] * num_classes |
|
logits[token2id[data['token']]] = 1 |
|
pred_token_logits.append(logits) |
|
|
|
logits_norm = [0] * num_classes_norm |
|
logits_norm[token2id_norm[norm_same_token(data['token'])]] = 1 |
|
norm_pred_token_logits.append(logits_norm) |
|
|
|
gt_token_array = np.array(gt_token_array) |
|
pred_token_logits = np.array(pred_token_logits) |
|
|
|
norm_gt_token_array = np.array(norm_gt_token_array) |
|
norm_pred_token_logits = np.array(norm_pred_token_logits) |
|
|
|
token_cost = 1.0 - pred_token_logits[:, gt_token_array] |
|
norm_token_cost = 1.0 - norm_pred_token_logits[:, norm_gt_token_array] |
|
|
|
token_cost[np.logical_and(token_cost==1, norm_token_cost==0)] = 0.05 |
|
return token_cost.T |
|
|
|
|
|
def box2array(self, box_list, size): |
|
W, H = size |
|
box_array = [] |
|
for box in box_list: |
|
x_min, y_min, x_max, y_max = box['bbox'] |
|
box_array.append([x_min/W, y_min/H, x_max/W, y_max/H]) |
|
return np.array(box_array) |
|
|
|
def order2array(self, box_list): |
|
order_array = [] |
|
for idx, box in enumerate(box_list): |
|
order_array.append([idx / len(box_list)]) |
|
return np.array(order_array) |
|
|
|
def calculate_l1_cost(self, gt_array, pred_array): |
|
scale = gt_array.shape[-1] |
|
l1_cost = cdist(gt_array, pred_array, 'minkowski', p=1) |
|
return l1_cost / scale |
|
|
|
def __call__(self, box_gt, box_pred, gt_size, pred_size): |
|
aa = time.time() |
|
gt_box_array = self.box2array(box_gt, gt_size) |
|
pred_box_array = self.box2array(box_pred, pred_size) |
|
gt_order_array = self.order2array(box_gt) |
|
pred_order_array = self.order2array(box_pred) |
|
|
|
token_cost = self.calculate_token_cost(box_gt, box_pred) |
|
position_cost = self.calculate_l1_cost(gt_box_array, pred_box_array) |
|
order_cost = self.calculate_l1_cost(gt_order_array, pred_order_array) |
|
|
|
self.cost["token"] = token_cost |
|
self.cost["position"] = position_cost |
|
self.cost["order"] = order_cost |
|
|
|
cost = self.cost_token * token_cost + self.cost_position * position_cost + self.cost_order * order_cost |
|
cost[np.isnan(cost) | np.isinf(cost)] = 100 |
|
indexes = linear_sum_assignment(cost) |
|
matched_idxes = [] |
|
for a, b in zip(*indexes): |
|
matched_idxes.append((a, b)) |
|
|
|
return matched_idxes |