Spaces:
Sleeping
Sleeping
File size: 11,273 Bytes
c9baa67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
#-------------------------------------------------------------------------------
# Name: pySaliencyMap
# Purpose: Extracting a saliency map from a single still image
#
# Author: Akisato Kimura <akisato@ieee.org>
#
# Created: April 24, 2014
# Copyright: (c) Akisato Kimura 2014-
# Licence: All rights reserved
#-------------------------------------------------------------------------------
import cv2
import numpy as np
import SaRa.pySaliencyMapDefs as pySaliencyMapDefs
import time
class pySaliencyMap:
# initialization
def __init__(self, width, height):
self.width = width
self.height = height
self.prev_frame = None
self.SM = None
self.GaborKernel0 = np.array(pySaliencyMapDefs.GaborKernel_0)
self.GaborKernel45 = np.array(pySaliencyMapDefs.GaborKernel_45)
self.GaborKernel90 = np.array(pySaliencyMapDefs.GaborKernel_90)
self.GaborKernel135 = np.array(pySaliencyMapDefs.GaborKernel_135)
# extracting color channels
def SMExtractRGBI(self, inputImage):
# convert scale of array elements
src = np.float32(inputImage) * 1./255
# split
(B, G, R) = cv2.split(src)
# extract an intensity image
I = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
# return
return R, G, B, I
# feature maps
## constructing a Gaussian pyramid
def FMCreateGaussianPyr(self, src):
dst = list()
dst.append(src)
for i in range(1,9):
nowdst = cv2.pyrDown(dst[i-1])
dst.append(nowdst)
return dst
## taking center-surround differences
def FMCenterSurroundDiff(self, GaussianMaps):
dst = list()
for s in range(2,5):
now_size = GaussianMaps[s].shape
now_size = (now_size[1], now_size[0]) ## (width, height)
tmp = cv2.resize(GaussianMaps[s+3], now_size, interpolation=cv2.INTER_LINEAR)
nowdst = cv2.absdiff(GaussianMaps[s], tmp)
dst.append(nowdst)
tmp = cv2.resize(GaussianMaps[s+4], now_size, interpolation=cv2.INTER_LINEAR)
nowdst = cv2.absdiff(GaussianMaps[s], tmp)
dst.append(nowdst)
return dst
## constructing a Gaussian pyramid + taking center-surround differences
def FMGaussianPyrCSD(self, src):
GaussianMaps = self.FMCreateGaussianPyr(src)
dst = self.FMCenterSurroundDiff(GaussianMaps)
return dst
## intensity feature maps
def IFMGetFM(self, I):
return self.FMGaussianPyrCSD(I)
## color feature maps
def CFMGetFM(self, R, G, B):
# max(R,G,B)
tmp1 = cv2.max(R, G)
RGBMax = cv2.max(B, tmp1)
RGBMax[RGBMax <= 0] = 0.0001 # prevent dividing by 0
# min(R,G)
RGMin = cv2.min(R, G)
# RG = (R-G)/max(R,G,B)
RG = (R - G) / RGBMax
# BY = (B-min(R,G)/max(R,G,B)
BY = (B - RGMin) / RGBMax
# clamp nagative values to 0
RG[RG < 0] = 0
BY[BY < 0] = 0
# obtain feature maps in the same way as intensity
RGFM = self.FMGaussianPyrCSD(RG)
BYFM = self.FMGaussianPyrCSD(BY)
# return
return RGFM, BYFM
## orientation feature maps
def OFMGetFM(self, src):
# creating a Gaussian pyramid
GaussianI = self.FMCreateGaussianPyr(src)
# convoluting a Gabor filter with an intensity image to extract oriemtation features
GaborOutput0 = [ np.empty((1,1)), np.empty((1,1)) ] # dummy data: any kinds of np.array()s are OK
GaborOutput45 = [ np.empty((1,1)), np.empty((1,1)) ]
GaborOutput90 = [ np.empty((1,1)), np.empty((1,1)) ]
GaborOutput135 = [ np.empty((1,1)), np.empty((1,1)) ]
for j in range(2,9):
GaborOutput0.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel0) )
GaborOutput45.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel45) )
GaborOutput90.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel90) )
GaborOutput135.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel135) )
# calculating center-surround differences for every oriantation
CSD0 = self.FMCenterSurroundDiff(GaborOutput0)
CSD45 = self.FMCenterSurroundDiff(GaborOutput45)
CSD90 = self.FMCenterSurroundDiff(GaborOutput90)
CSD135 = self.FMCenterSurroundDiff(GaborOutput135)
# concatenate
dst = list(CSD0)
dst.extend(CSD45)
dst.extend(CSD90)
dst.extend(CSD135)
# return
return dst
## motion feature maps
def MFMGetFM(self, src):
# convert scale
I8U = np.uint8(255 * src)
# cv2.waitKey(10)
# calculating optical flows
if self.prev_frame is not None:
farne_pyr_scale= pySaliencyMapDefs.farne_pyr_scale
farne_levels = pySaliencyMapDefs.farne_levels
farne_winsize = pySaliencyMapDefs.farne_winsize
farne_iterations = pySaliencyMapDefs.farne_iterations
farne_poly_n = pySaliencyMapDefs.farne_poly_n
farne_poly_sigma = pySaliencyMapDefs.farne_poly_sigma
farne_flags = pySaliencyMapDefs.farne_flags
flow = cv2.calcOpticalFlowFarneback(\
prev = self.prev_frame, \
next = I8U, \
pyr_scale = farne_pyr_scale, \
levels = farne_levels, \
winsize = farne_winsize, \
iterations = farne_iterations, \
poly_n = farne_poly_n, \
poly_sigma = farne_poly_sigma, \
flags = farne_flags, \
flow = None \
)
flowx = flow[...,0]
flowy = flow[...,1]
else:
flowx = np.zeros(I8U.shape)
flowy = np.zeros(I8U.shape)
# create Gaussian pyramids
dst_x = self.FMGaussianPyrCSD(flowx)
dst_y = self.FMGaussianPyrCSD(flowy)
# update the current frame
self.prev_frame = np.uint8(I8U)
# return
return dst_x, dst_y
# conspicuity maps
## standard range normalization
def SMRangeNormalize(self, src):
minn, maxx, dummy1, dummy2 = cv2.minMaxLoc(src)
if maxx!=minn:
dst = src/(maxx-minn) + minn/(minn-maxx)
else:
dst = src - minn
return dst
## computing an average of local maxima
def SMAvgLocalMax(self, src):
# size
stepsize = pySaliencyMapDefs.default_step_local
width = src.shape[1]
height = src.shape[0]
# find local maxima
numlocal = 0
lmaxmean = 0
for y in range(0, height-stepsize, stepsize):
for x in range(0, width-stepsize, stepsize):
localimg = src[y:y+stepsize, x:x+stepsize]
lmin, lmax, dummy1, dummy2 = cv2.minMaxLoc(localimg)
lmaxmean += lmax
numlocal += 1
# averaging over all the local regions (error checking for numlocal)
if numlocal==0:
return 0
else:
return lmaxmean / numlocal
## normalization specific for the saliency map model
def SMNormalization(self, src):
dst = self.SMRangeNormalize(src)
lmaxmean = self.SMAvgLocalMax(dst)
normcoeff = (1-lmaxmean)*(1-lmaxmean)
return dst * normcoeff
## normalizing feature maps
def normalizeFeatureMaps(self, FM):
NFM = list()
for i in range(0,6):
normalizedImage = self.SMNormalization(FM[i])
nownfm = cv2.resize(normalizedImage, (self.width, self.height), interpolation=cv2.INTER_LINEAR)
NFM.append(nownfm)
return NFM
## intensity conspicuity map
def ICMGetCM(self, IFM):
NIFM = self.normalizeFeatureMaps(IFM)
ICM = sum(NIFM)
return ICM
## color conspicuity map
def CCMGetCM(self, CFM_RG, CFM_BY):
# extracting a conspicuity map for every color opponent pair
CCM_RG = self.ICMGetCM(CFM_RG)
CCM_BY = self.ICMGetCM(CFM_BY)
# merge
CCM = CCM_RG + CCM_BY
# return
return CCM
## orientation conspicuity map
def OCMGetCM(self, OFM):
OCM = np.zeros((self.height, self.width))
for i in range (0,4):
# slicing
nowofm = OFM[i*6:(i+1)*6] # angle = i*45
# extracting a conspicuity map for every angle
NOFM = self.ICMGetCM(nowofm)
# normalize
NOFM2 = self.SMNormalization(NOFM)
# accumulate
OCM += NOFM2
return OCM
## motion conspicuity map
def MCMGetCM(self, MFM_X, MFM_Y):
return self.CCMGetCM(MFM_X, MFM_Y)
# core
def SMGetSM(self, src):
# definitions
size = src.shape
width = size[1]
height = size[0]
# check
# if(width != self.width or height != self.height):
# sys.exit("size mismatch")
# extracting individual color channels
R, G, B, I = self.SMExtractRGBI(src)
# extracting feature maps
IFM = self.IFMGetFM(I)
CFM_RG, CFM_BY = self.CFMGetFM(R, G, B)
OFM = self.OFMGetFM(I)
MFM_X, MFM_Y = self.MFMGetFM(I)
# extracting conspicuity maps
ICM = self.ICMGetCM(IFM)
CCM = self.CCMGetCM(CFM_RG, CFM_BY)
OCM = self.OCMGetCM(OFM)
MCM = self.MCMGetCM(MFM_X, MFM_Y)
# adding all the conspicuity maps to form a saliency map
wi = pySaliencyMapDefs.weight_intensity
wc = pySaliencyMapDefs.weight_color
wo = pySaliencyMapDefs.weight_orientation
wm = pySaliencyMapDefs.weight_motion
SMMat = wi*ICM + wc*CCM + wo*OCM + wm*MCM
# normalize
normalizedSM = self.SMRangeNormalize(SMMat)
normalizedSM2 = normalizedSM.astype(np.float32)
smoothedSM = cv2.bilateralFilter(normalizedSM2, 7, 3, 1.55)
self.SM = cv2.resize(smoothedSM, (width,height), interpolation=cv2.INTER_NEAREST)
# return
return self.SM
def SMGetBinarizedSM(self, src):
# get a saliency map
if self.SM is None:
self.SM = self.SMGetSM(src)
# convert scale
SM_I8U = np.uint8(255 * self.SM)
# binarize
thresh, binarized_SM = cv2.threshold(SM_I8U, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU)
return binarized_SM
def SMGetSalientRegion(self, src):
# get a binarized saliency map
binarized_SM = self.SMGetBinarizedSM(src)
# GrabCut
img = src.copy()
mask = np.where((binarized_SM!=0), cv2.GC_PR_FGD, cv2.GC_PR_BGD).astype('uint8')
bgdmodel = np.zeros((1,65),np.float64)
fgdmodel = np.zeros((1,65),np.float64)
rect = (0,0,1,1) # dummy
iterCount = 1
cv2.grabCut(img, mask=mask, rect=rect, bgdModel=bgdmodel, fgdModel=fgdmodel, iterCount=iterCount, mode=cv2.GC_INIT_WITH_MASK)
# post-processing
mask_out = np.where((mask==cv2.GC_FGD) + (mask==cv2.GC_PR_FGD), 255, 0).astype('uint8')
output = cv2.bitwise_and(img,img,mask=mask_out)
return output
|