Video Classification Based On Action (from scratch & without GPU support)

Sujoy Kumar Goswami
Analytics Vidhya
Published in
4 min readApr 16, 2020

NO GPU!! NO EXTERNAL HEAVY DATA-SET!! Read to learn & implement the basic video classification technique based on temporal action in any machine.

Here I shall create own video data where, a rectangle is moving in different directions. The sample code (use Jupyter Notebook) is below:

import numpy as np
import skvideo.io as sk
# creating sample video data
num_vids = 5
num_imgs = 100
img_size = 50
min_object_size = 1
max_object_size = 5

for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while x>0:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
x = x-1
i_img = i_img+1
sk.vwrite(vid_name, imgs.astype(np.uint8))
# play a video
from IPython.display import Video
Video(“vid3.mp4”) # the script & video should be in same folder

Now I shall create 4 different types of videos where, a rectangle is moving in 4 directions: left, right, up, down. Accordingly there will be 4 classes which I shall classify based on these video data by Deep Learning. Go through the below code (with python 3.6.9, keras 2.2.4 in Jupyter Notebook); read the comments for sure.

import numpy as np# preparing dataset
X_train = []
Y_train = []
labels = enumerate([‘left’, ‘right’, ‘up’, ‘down’]) #4 classes
num_vids = 30
num_imgs = 30
img_size = 20
min_object_size = 1
max_object_size = 5
# video frames with left moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while x>0:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
x = x-1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(0)
# video frames with right moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while x<img_size:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
x = x+1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(1)
# video frames with up moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while y>0:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
y = y-1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(2)

# video frames with down moving object
for i_vid in range(num_vids):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
#vid_name = ‘vid’ + str(i_vid) + ‘.mp4’
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while y<img_size:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
y = y+1
i_img = i_img+1
X_train.append(imgs)
for i in range(0,num_imgs):
Y_train.append(3)
# data pre-processing
from keras.utils import np_utils
X_train=np.array(X_train, dtype=np.float32) /255
X_train=X_train.reshape(X_train.shape[0], num_imgs, img_size, img_size, 1)
print(X_train.shape)
Y_train=np.array(Y_train, dtype=np.uint8)
Y_train = Y_train.reshape(X_train.shape[0], 1)
print(Y_train.shape)
Y_train = np_utils.to_categorical(Y_train, 4)

(120, 30, 20, 20, 1)
(120, 1)

# building model
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout
from keras.layers.pooling import MaxPooling2D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
model = Sequential()
# TimeDistributed layer is to pass temporal information to the n/w
model.add(TimeDistributed(Conv2D(8, (3, 3), strides=(1, 1), activation=’relu’, padding=’same’), input_shape=(num_imgs, img_size, img_size, 1)))
model.add(TimeDistributed(Conv2D(8, (3,3), kernel_initializer=”he_normal”, activation=’relu’)))
model.add(TimeDistributed(MaxPooling2D((1, 1), strides=(1, 1))))
model.add(TimeDistributed(Flatten()))
model.add(Dropout(0.3))
model.add(LSTM(64, return_sequences=False, dropout=0.3))
model.add(Dense(4, activation=’softmax’))
model.compile(optimizer=’adam’, loss=’categorical_crossentropy’, metrics=[‘accuracy’])
model.summary()
# model training
model.fit(X_train, Y_train, nb_epoch=50, verbose=1)
# model testing with new data (4 videos)
X_test=[]
Y_test=[]
for i_vid in range(2):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while x<img_size:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
x = x+1
i_img = i_img+1
X_test.append(imgs)
# 2nd class — ‘right’
for i_vid in range(2):
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size — w)
y = np.random.randint(0, img_size — h)
i_img = 0
while y<img_size:
imgs[i_img, y:y+h, x:x+w] = 255 # set rectangle as foreground
y = y+1
i_img = i_img+1
X_test.append(imgs)
# 4th class — ‘down’
X_test=np.array(X_test, dtype=np.float32) /255
X_test=X_test.reshape(X_test.shape[0], num_imgs, img_size, img_size, 1)
pred=model.predict_classes(X_test)
pred

array([1, 1, 3, 3], dtype=int64)

Here the 4 test videos are getting classified correctly.

Thanks for reading. Also go through my very first related post here.

--

--