Auto Labeling is not a dream !

ChengKang Tan
6 min readFeb 6, 2024

--

AI generation is coming, and we need to collect more data to train our models or improve them. However, all of this data needs ground truth. For tasks like segmentation or object detection, we need to manually crop a bounding box or draw a line to fit our target object, which is very difficult and time-consuming.

[⭐Update ⭐] Other project !

  • using auto labeling skill in annotation app !

Now, we can use YOLO V8 to automate all of these steps!

Let’s start !

First you need to

pip install ultralytics

If you already have YOLO V8 but it’s an older version, you can use the following command to upgrade it using pip:

pip install ultralytics --upgrade

Import libraries

from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
import os

Let’s see what classes YOLO has for pretraining.

names = model.model.names
print(names)
{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 
5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite',
34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork',
43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple',
48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog',
53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch',
58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv',
63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone',
68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator',
73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
78: 'hair drier', 79: 'toothbrush'}

There are five models in YOLO V8. with the smallest one on top and the largest one on the bottom.

For this exercise, I will using the smallest model YOLOv8n.

https://github.com/ultralytics/ultralytics
model = YOLO("yolov8n-seg.pt")
model.to('cuda:0') # to gpu 0

Load video

cap = cv2.VideoCapture("cars.mp4")

In this exercise, I used this video. You can also download it from the link below.

[ Link ]

Resize the video

output_width = 1080
output_height = 600

Choose the object you want to segment.

objects_of_interest = ['car'] #<- which object do you want to segment

object_counts = {obj: 0 for obj in objects_of_interest}

Predict

while True:
ret, frame = cap.read()
if not ret:
print("Video frame is empty or video processing has been successfully completed.")
break

frame= cv2.resize(frame, (output_width, output_height))

# conf => confidence score
results = model.predict(frame, device='0', conf=0.25)

Data annotator

# result : is the predict result using yolo v8.

if results[0].masks is not None:
clss = results[0].boxes.cls.cpu().tolist()
masks = results[0].masks.xy

annotator = Annotator(frame, line_width=2)

for idx, (mask, cls) in enumerate(zip(masks, clss)):
det_label = names[int(cls)]

# if det_label is the object we want to detect
if det_label in objects_of_interest:

# if you don't have seg_bbox function,
# then you need to upgrade your Yolo v8.
annotator.seg_bbox(mask=mask,
mask_color=colors(int(cls), True),
det_label=det_label)

# Count objects
object_counts[det_label] += 1

What values in result ?

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: ultralytics.engine.results.Masks object
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71:
'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
obb: None
orig_img: array([[[52, 48, 38],
[55, 51, 41],
[60, 54, 44],
...,
[29, 26, 25],
[29, 26, 25],
[30, 27, 26]],

[[58, 54, 44],
[56, 52, 42],
[59, 53, 43],
...,
[32, 29, 28],
[32, 29, 28],
[32, 29, 28]],

[[54, 50, 40],
[54, 50, 40],
[60, 54, 44],
...,
[30, 27, 26],
[30, 27, 26],
[30, 27, 26]],

...,

[[39, 30, 34],
[40, 31, 35],
[40, 32, 34],
...,
[52, 42, 42],
[52, 43, 40],
[52, 43, 40]],

[[36, 30, 33],
[37, 31, 34],
[41, 36, 37],
...,
[49, 41, 43],
[51, 41, 41],
[51, 41, 41]],

[[37, 32, 31],
[35, 30, 29],
[34, 29, 28],
...,
[50, 42, 44],
[52, 42, 42],
[52, 42, 42]]], dtype=uint8)
orig_shape: (600, 1080)
path: 'image0.jpg'
probs: None
save_dir: None
speed: {'preprocess': 1.0001659393310547, 'inference': 4.999876022338867, 'postprocess': 0.9996891021728516}]

Object count + visualize

text_y = 30
for obj, count in object_counts.items():
cv2.putText(frame, f"{obj.capitalize()} Count: {count}", (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
text_y += 40

# out.write(frame)
cv2.imshow("instance-segmentation", frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

# out.release()
cap.release()
cv2.destroyAllWindows()

Full code

import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
import os

model = YOLO("yolov8n-seg.pt")
model.to('cuda')
names = model.model.names

cap = cv2.VideoCapture("cars.mp4")

output_folder = "output_instances"
os.makedirs(output_folder, exist_ok=True)

# out = cv2.VideoWriter('instance-segmentation.avi',
# cv2.VideoWriter_fourcc(*'MJPG'),
# 30, (int(cap.get(3)), int(cap.get(4))))

output_width = 1080
output_height = 600

objects_of_interest = ['car']

object_counts = {obj: 0 for obj in objects_of_interest}

while True:
ret, frame = cap.read()
if not ret:
print("Video frame is empty or video processing has been successfully completed.")
break

frame = cv2.resize(frame, (output_width, output_height))

results = model.predict(frame, device='0', conf=0.25)
if results[0].masks is not None:
clss = results[0].boxes.cls.cpu().tolist()
masks = results[0].masks.xy

annotator = Annotator(frame, line_width=2)

for idx, (mask, cls) in enumerate(zip(masks, clss)):
det_label = names[int(cls)]
if det_label in objects_of_interest:
annotator.seg_bbox(mask=mask,
mask_color=colors(int(cls), True),
det_label=det_label)

object_counts[det_label] += 1

# Save each instance segmented object
# instance_folder = os.path.join(output_folder, det_label)
# os.makedirs(instance_folder, exist_ok=True)
# instance_path = os.path.join(instance_folder, f"{det_label}_{idx}.png")
# cv2.imwrite(instance_path, frame)

text_y = 30
for obj, count in object_counts.items():
cv2.putText(frame, f"{obj.capitalize()} Count: {count}", (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
text_y += 40

# out.write(frame)
cv2.imshow("instance-segmentation", frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

# out.release()
cap.release()
cv2.destroyAllWindows()

⭐If you want to see the segmentation results, uncomment the above comments.

If you want to see a fast demonstration of segmentation, keep the comments as they are and run the code.⭐

The counting method needs to be revised later.

Thanks !

--

--

ChengKang Tan

NCKU_CSIE 💻Master print(" I want to share and record my knowledge through this website.") 🌌