Better ML

Software Engineering for ML  — Implementation, Versoning, Operations

Image pre-processing for TF Serving via OpenCV, Pillow, TensorFlow tf.image.decode* , Keras-Retinanet Example

--

Example Input Images

(all images are attributed to their owners, just showing here for technical demo)

640*480
1234*818
1280*918
8400*6777

Loaded via retinanet_helper

# for reitnanet image processing
from keras.preprocessing import image
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color
def decode_image_retinanet(img_path):
## Going to read the image via retinanet helper- the original way
start = timer()
image = read_image_bgr(img_path)
# preprocess image for network
image = preprocess_image(image)
image, scale = resize_image(image)
end = timer()
print("decode time=",end - start)
#('decode time=', 0.028119802474975586)
# these are the best scores
#('Label', 'person', ' at ', array([409, 167, 728, 603]), ' Score ', 0.9681119)
#('Label', 'person', ' at ', array([ 0, 426, 512, 785]), ' Score ', 0.8355836)
#('Label', 'person', ' at ', array([ 723, 475, 1067, 791]), ' Score ', 0.72344124)
#('Label', 'tie', ' at ', array([527, 335, 569, 505]), ' Score ', 0.525432)
return image,image

Output

(‘Number of test=’, 1)
(‘decode time=’, 0.028119802474975586)
(‘in image shape’, (800, 1067, 3))
(‘Input shape=’, (1, 800, 1067, 3))
(‘in tf shape’, (1, 800, 1067, 3))
(‘result no’, 0)
(‘boxes output’, (1, 300, 4))
(‘scores output’, (1, 300))
(‘labels output’, (1, 300))
(‘Label’, ‘person’, ‘ at ‘, array([409, 167, 728, 603]), ‘ Score ‘, 0.9681119)
(‘Label’, ‘person’, ‘ at ‘, array([ 0, 426, 512, 785]), ‘ Score ‘, 0.8355836)
(‘Label’, ‘person’, ‘ at ‘, array([ 723, 475, 1067, 791]), ‘ Score ‘, 0.72344124)
(‘Label’, ‘tie’, ‘ at ‘, array([527, 335, 569, 505]), ‘ Score ‘, 0.525432)
(‘Time for ‘, 1, ‘ is ‘, 0.8878581523895264)

Load via OpenCV

Code

Note OpenCV reads in BRG format — with swapRB — it changes to RGB format. This causes variations — found this setting good for Keras Retinanet model overall

http://answers.opencv.org/question/174139/is-the-swaprb-value-in-the-example-googlenet-dnn-code-wrong/

def decode_image_opencv(img_path):
### Going to create image vector via OpenCV
#todo https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/examples/getting%20started.html
start = timer()
image = cv2.imread(img_path,1)
image = image_resize(image,height=800)
org = image
image = cv2.dnn.blobFromImage(image, scalefactor=1.0,mean=(103.939, 116.779, 123.68), swapRB=True)
# this gives shape as (1, 3, 480, 640))
# we need it as ('Input shape=', (1, 480, 640, 3))
image = np.transpose(image, (0, 2, 3, 1))
#image = image.astype('f')
#image = image -127.5
end = timer()
print("decode time=",end - start)
#'decode time=', 0.007803916931152344)
return image,org
#https://stackoverflow.com/a/44659589/429476
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized

Output

'Label', 'person', ' at ', array([590, 368, 649, 501]), ' Score ', 0.8739606)
('Label', 'person', ' at ', array([457, 362, 503, 504]), ' Score ', 0.8183074)
('Label', 'person', ' at ', array([532, 352, 568, 445]), ' Score ', 0.804338)
('Label', 'person', ' at ', array([500, 354, 535, 439]), ' Score ', 0.5898186)
('Label', 'dog', ' at ', array([375, 455, 394, 490]), ' Score ', 0.46877873)
('Label', 'person', ' at ', array([897, 335, 916, 371]), ' Score ', 0.43948865)
('Label', 'person', ' at ', array([926, 335, 943, 375]), ' Score ', 0.4195389)
('Label', 'person', ' at ', array([529, 301, 544, 344]), ' Score ', 0.30523297)
('Time for ', 1, ' is ', 13.235145092010498)

With RGB switched

With RGB not switched

With RGB switched

With RGB not switched

With RGB switched

With TF

def decode_image_tf_reader(img_path):
## Going to read the image via TF helper
img_raw = tf.read_file(img_path)
start = timer()
img_tensor = tf.image.decode_jpeg(img_raw, channels=0,
dct_method="INTEGER_FAST") #not much effect here decode time 30ms
print("img_tensor.shape=",img_tensor.shape)
image = tf.cast(img_tensor, tf.float32)

#image = tf.image.resize_images(img_tensor, [800,1067])

smallest_side = 480.0 # will losse some info
height, width = tf.shape(image)[0], tf.shape(image)[1]
height = tf.to_float(height)
width = tf.to_float(width)

scale = tf.cond(tf.greater(height, width),
lambda: smallest_side / width,
lambda: smallest_side / height)
new_height = tf.to_int32(height * scale)
new_width = tf.to_int32(width * scale)
image = tf.image.resize_images(image, [new_height, new_width])#image = tf.image.resize_images(image, [800,1200])#https://forums.fast.ai/t/how-is-vgg16-mean-calculated/4577/19
VGG_MEAN = [123.68, 116.78, 103.94] # This is R-G-B for Imagenet
#means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
image = image - means
# swap to BGR
img_channel_swap = image[..., ::-1]
image = tf.reverse(image, axis=[-1])
#without the above preprocessing there is a miss of detection and change in weight
image = tf.Session().run(image)
#image = image[:, :, [2,1,0]] # swap channel from RGB to BGR
end = timer()
print("decode time=",end - start)
#('decode time=', 0.032473087310791016)
#('Label', 'person', ' at ', array([ 0, 252, 306, 470]), ' Score ', 0.8479492)
#('Label', 'person', ' at ', array([241, 97, 435, 365]), ' Score ', 0.7183717)
#('Label', 'person', ' at ', array([429, 287, 635, 475]), ' Score ', 0.67711633)
#('Label', 'tie', ' at ', array([313, 199, 340, 311]), ' Score ', 0.5820301
return image,image
(‘Number of test=’, 1)
(‘img_tensor.shape=’, TensorShape([Dimension(None), Dimension(None), Dimension(None)]))
2019–03–27 09:17:00.288596: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
(‘decode time=’, 0.032473087310791016)
(‘in image shape’, (480, 640, 3))
(‘Input shape=’, (1, 480, 640, 3))
(‘in tf shape’, (1, 480, 640, 3))
(‘result no’, 0)
(‘boxes output’, (1, 300, 4))
(‘scores output’, (1, 300))
(‘labels output’, (1, 300))
(‘Label’, ‘person’, ‘ at ‘, array([ 0, 252, 306, 470]), ‘ Score ‘, 0.8479492)
(‘Label’, ‘person’, ‘ at ‘, array([241, 97, 435, 365]), ‘ Score ‘, 0.7183717)
(‘Label’, ‘person’, ‘ at ‘, array([429, 287, 635, 475]), ‘ Score ‘, 0.67711633)
(‘Label’, ‘tie’, ‘ at ‘, array([313, 199, 340, 311]), ‘ Score ‘, 0.5820301)
(‘Time for ‘, 1, ‘ is ‘, 0.7225308418273926)

Load via Pillow

### Going to create image vector via PILstart = timer()image = np.asarray(Image.open(img_path).convert('RGB'))image = image.astype('f')end = timer()print("decode time=",end - start)

Ouput

and without conversion as in

#('decode time=', 0.023459911346435547)# it is not able to detect one person object#Label', 'person', ' at ', array([  0, 258, 308, 473]), ' Score ', 0.8666027)#('Label', 'person', ' at ', array([239,  98, 427, 378]), ' Score ', 0.745802)#('Label', 'tie', ' at ', array([312, 199, 340, 314]), ' Score ', 0.7037207)

How increase in image size aids detection (and takes more resources and time)

Original image 8400*6777

Detection by Retinanet loader Pillow + image normalization

('Number of test=', 1)
('decode time=', 2.9874989986419678)
('in image shape', (800, 992, 3))
('Input shape=', (1, 800, 992, 3))
('in tf shape', (1, 800, 992, 3))
('result no', 0)
('boxes output', (1, 300, 4))
('scores output', (1, 300))
('labels output', (1, 300))
('Label', 'person', ' at ', array([726, 234, 796, 424]), ' Score ', 0.939078)
('Label', 'person', ' at ', array([418, 232, 464, 425]), ' Score ', 0.9093691)
('Label', 'person', ' at ', array([521, 234, 578, 430]), ' Score ', 0.90926725)
('Label', 'person', ' at ', array([466, 670, 621, 798]), ' Score ', 0.89821684)
('Label', 'person', ' at ', array([ 44, 229, 112, 424]), ' Score ', 0.89342)
('Label', 'person', ' at ', array([607, 227, 703, 428]), ' Score ', 0.88139987)
('Label', 'person', ' at ', array([148, 458, 270, 664]), ' Score ', 0.88060725)
('Label', 'person', ' at ', array([885, 233, 987, 401]), ' Score ', 0.87992036)
('Label', 'person', ' at ', array([742, 454, 865, 665]), ' Score ', 0.87926996)
('Label', 'person', ' at ', array([482, 8, 612, 219]), ' Score ', 0.8729649)
('Label', 'person', ' at ', array([171, 241, 246, 430]), ' Score ', 0.859805)
('Label', 'person', ' at ', array([ 7, 2, 129, 218]), ' Score ', 0.851859)
('Label', 'person', ' at ', array([868, 454, 965, 662]), ' Score ', 0.8461512)
('Label', 'person', ' at ', array([ 9, 450, 126, 665]), ' Score ', 0.8399973)
('Label', 'person', ' at ', array([823, 234, 870, 417]), ' Score ', 0.8194807)
('Label', 'person', ' at ', array([745, 20, 861, 216]), ' Score ', 0.8122773)
('Label', 'person', ' at ', array([605, 457, 725, 665]), ' Score ', 0.8076674)
('Label', 'person', ' at ', array([289, 444, 414, 665]), ' Score ', 0.79930824)
('Label', 'person', ' at ', array([133, 8, 254, 215]), ' Score ', 0.7968467)
('Label', 'person', ' at ', array([619, 7, 742, 216]), ' Score ', 0.7953634)
('Label', 'person', ' at ', array([619, 680, 787, 799]), ' Score ', 0.7879201)
('Label', 'person', ' at ', array([251, 3, 351, 223]), ' Score ', 0.78193235)
('Label', 'person', ' at ', array([189, 687, 297, 794]), ' Score ', 0.7808079)
('Label', 'person', ' at ', array([783, 664, 971, 794]), ' Score ', 0.7797159)
('Label', 'person', ' at ', array([449, 453, 553, 668]), ' Score ', 0.7750649)
('Label', 'person', ' at ', array([301, 252, 345, 432]), ' Score ', 0.77126116)
('Label', 'person', ' at ', array([365, 8, 471, 212]), ' Score ', 0.7604503)
('Label', 'cell phone', ' at ', array([697, 668, 747, 706]), ' Score ', 0.70097476)
('Label', 'person', ' at ', array([871, 8, 985, 218]), ' Score ', 0.6951452)
('Label', 'person', ' at ', array([363, 680, 477, 785]), ' Score ', 0.6740273)
('Label', 'person', ' at ', array([ 47, 676, 148, 793]), ' Score ', 0.6373141)
('Time for ', 1, ' is ', 2.5029850006103516)

Via TensorFLow

at resolution

img_tensor = tf.image.decode_jpeg(img_raw, channels=0,dct_method="INTEGER_FAST") #not much effect here decode time 30msprint("img_tensor.shape=",img_tensor.shape)image = tf.cast(img_tensor, tf.float32)#image = tf.image.resize_images(img_tensor, [800,1067])image = tf.image.resize_images(image, [800,1600])VGG_MEAN = [123.68, 116.78, 103.94]    # This is R-G-B for Imagenet#VGG_MEAN = [103.94,116.78,123.68]means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])image = image - means

Via OpenCV

Detection are resolution 800, 1067

('Number of test=', 1)
('decode time=', 0.6910791397094727)
('in image shape', (800, 1067, 3))
('Input shape=', (1, 800, 1067, 3))
('in tf shape', (1, 800, 1067, 3))
('result no', 0)
('boxes output', (1, 300, 4))
('scores output', (1, 300))
('labels output', (1, 300))
('Label', 'person', ' at ', array([507, 667, 671, 798]), ' Score ', 0.891104)
('Label', 'person', ' at ', array([565, 238, 621, 428]), ' Score ', 0.8876671)
('Label', 'person', ' at ', array([186, 243, 264, 429]), ' Score ', 0.8755632)
('Label', 'person', ' at ', array([794, 450, 929, 669]), ' Score ', 0.86985874)
('Label', 'person', ' at ', array([311, 444, 448, 662]), ' Score ', 0.8531823)
('Label', 'person', ' at ', array([270, 7, 380, 214]), ' Score ', 0.8526345)
('Label', 'person', ' at ', array([779, 230, 855, 422]), ' Score ', 0.8257129)
('Label', 'person', ' at ', array([450, 232, 496, 423]), ' Score ', 0.81420606)
('Label', 'person', ' at ', array([524, 10, 658, 217]), ' Score ', 0.7964738)
('Label', 'person', ' at ', array([ 844, 663, 1003, 796]), ' Score ', 0.7255119)
('Label', 'person', ' at ', array([409, 8, 514, 201]), ' Score ', 0.7211455)
('Label', 'person', ' at ', array([172, 452, 284, 673]), ' Score ', 0.71296626)
('Label', 'person', ' at ', array([ 942, 8, 1058, 219]), ' Score ', 0.67450273)
('Label', 'person', ' at ', array([643, 452, 768, 654]), ' Score ', 0.6541213)
('Label', 'person', ' at ', array([325, 245, 368, 430]), ' Score ', 0.60505646)
('Label', 'person', ' at ', array([673, 8, 790, 218]), ' Score ', 0.5901652)
('Label', 'person', ' at ', array([209, 686, 320, 792]), ' Score ', 0.56278116)
('Label', 'person', ' at ', array([ 957, 235, 1015, 401]), ' Score ', 0.5500602)
('Label', 'person', ' at ', array([804, 16, 925, 283]), ' Score ', 0.53584856)
('Label', 'person', ' at ', array([ 942, 457, 1026, 650]), ' Score ', 0.5299792)
('Label', 'person', ' at ', array([485, 450, 591, 667]), ' Score ', 0.515951)
('Time for ', 1, ' is ', 1.6651248931884766)
Response Received Exiting

Full code — https://gist.github.com/alexcpn/c43e8980d32348fce1fd04f1a52d5ea7

--

--

Better ML
Better ML

Published in Better ML

Software Engineering for ML  — Implementation, Versoning, Operations

Alex Punnen
Alex Punnen

Written by Alex Punnen

SW Architect/programmer- in various languages and technologies from 2001 to now. https://www.linkedin.com/in/alexpunnen/

Responses (1)