Placing brand logos on T-shirts using Computer Vision
Introduction
This article explores the use of simple and freely available computer vision models to place brand logos on the t-shirts of the foreground person in a given image. We can use this as a guide for placing brand logos on any merchandise.
Overview
For this particular use case of putting brand logos on t-shirts of the foreground person in a given image the methodology is as follows:
- Detect people in the given image using YOLOv8n.
- Identify the foreground person.
- Detect shirt using segmentation for garments mattmdjaga/segformer_b2_clothes.
- Since a person can be standing in a variety of poses, we perform pose estimation for the foreground person using OpenPose.
- Use shoulders detected from pose estimation to get the location where logo must be placed.
- Add the logo to the original image.
Models
- YOLOv8n (https://github.com/ultralytics/ultralytics)
- SegFormer (https://huggingface.co/mattmdjaga/segformer_b2_clothes)
- OpenPose (https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/models/pose/coco/pose_deploy_linevec.prototxt)
Techinal implementation
import cv2
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from ultralytics import YOLO
import torch.nn as nn
from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
After importing required modules, we can load the different models required. This includes a YOLO model for detecting people in an image, a SegFormer model finetuned for garments and a pose estimation model.
# Load a model
model = YOLO('model/yolov8n.pt') # load an official model
processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes")
model_cloth = AutoModelForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes")
label_map_cloth = {0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes", 5: "Skirt", 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 10: "Right-shoe", 11: "Face", 12: "Left-leg", 13: "Right-leg", 14: "Left-arm", 15: "Right-arm", 16: "Bag", 17: "Scarf"}
### Pose estimation COCO
protoFile = "model/pose/coco/pose_deploy_linevec.prototxt"
weightsFile = "model/pose/coco/pose_iter_440000.caffemodel"
nPoints = 18
POSE_PAIRS = [[1,0],[1,2],[1,5],[2,3],[3,4],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13],[0,14],[0,15],[14,16],[15,17]]
# Read the network into Memory
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
With the following code, we can visualize the logo we want to place on the T-shirts.
logo = cv2.imread('images/logo.jpg')
logo = cv2.cvtColor(logo, cv2.COLOR_BGR2RGB)
plt.imshow(logo)
plt.axis('off')
plt.show()
Now we will understand the different helper functions.
def modify_logo_size(logo_copy, w, h, scale=0.03):
'''modifies the size of the original logo based on image on which to place it,
whose width and height is w, h'''
if h < w:
w = h
hlogo, wlogo = int(scale*w), int(scale*w)
logo_copy = cv2.resize(logo_copy,
(hlogo, wlogo),
interpolation = cv2.INTER_LINEAR)
return logo_copy
def find_logo_placement(shirt):
'''
Figure out where to place the logo in a given image.
This is empirical and can be changed based on preference of user.
'''
heights, widths = np.shape(shirt)
ratio = heights/widths
if ratio > 0.8:
lefthalf = (widths / 2)
lefthalf = lefthalf + (lefthalf/3)*ratio
topfourth = heights / 4
else:
lefthalf = (widths / 2)
lefthalf = lefthalf + (lefthalf/5.5)/ratio
topfourth = heights / 4 / ratio
return int(lefthalf), int(topfourth)
def find_logo_placement_pose(pose_information):
'''
Finds the shift in logo placement in accordance to the person's pose'''
leftsx, leftsy = pose_information[0][1:]
rightsx, rightsy = pose_information[1][1:]
shoulder_width = rightsx - leftsx
halfwayx = np.mean([leftsx, rightsx])
lefthalf = halfwayx + shoulder_width // 4
topfourth = rightsy + 10
return int(lefthalf), int(topfourth)
With the help of these helper functions we can write a function to perform logo placement.
### Read image
image = cv2.imread(image_name)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
### Show input image
plt.imshow(image)
plt.axis('off')
plt.show()
## Run YOLO on image to detect person
results = model(image_name) # predict on an image
for idx, result in enumerate(results):
boxes = result.boxes # Boxes object for bounding box outputs
names = result.names # label map
### To find the foreground person
boxes_person = []
for b in boxes:
bcls = int(b.cls.numpy()[0])
label = names[bcls]
if label != 'person':
continue
### .conf gives us the probablity of detection
bconf = b.conf.numpy()[0]
x,y,w,h = b.xyxy.numpy()[0]
boxes_person.append([bconf, x, y, w, h])
boxes_person = np.array(boxes_person)
max_index = np.argmax(boxes_person[:, 0])
## find the person with max prob/foreground person
foreground_person = boxes_person[max_index].astype(int)
x,y,w,h = foreground_person[1: ]
roi = image[y:y+h, x:w]
### Show foreground person detection boundaries
plt.imshow(roi)
plt.axis('off')
plt.show()
## detect clothes in the foreground person
inputs = processor(images=roi, return_tensors="pt")
outputs = model_cloth(**inputs)
frame = roi
frameCopy = np.copy(frame)
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
threshold = 0.1
inWidth = 368
inHeight = 368
inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
(0, 0, 0), swapRB=False, crop=False)
## Pose estimation
net.setInput(inpBlob)
output = net.forward()