Computer Vision

Live Face Tracking on iOS using Vision Framework

What is Vision Framework?

Why use Vision Framework?

Getting Started

  1. Stream the front camera feed onto the screen
  2. Detect faces and draw bounding boxes on screen

1. Streaming the front camera feed onto the screen

import AVFoundation
private let captureSession = AVCaptureSession()
private func addCameraInput() {
guard let device = AVCaptureDevice.DiscoverySession(
deviceTypes: [.builtInWideAngleCamera, .builtInDualCamera, .builtInTrueDepthCamera],
mediaType: .video,
position: .front).devices.first else {
fatalError("No back camera device found, please make sure to run SimpleLaneDetection in an iOS device and not a simulator")
}
let cameraInput = try! AVCaptureDeviceInput(device: device)
self.captureSession.addInput(cameraInput)
}
self.addCameraInput()
Info.plist file
private lazy var previewLayer = AVCaptureVideoPreviewLayer(session: self.captureSession)
private func showCameraFeed() {
self.previewLayer.videoGravity = .resizeAspectFill
self.view.layer.addSublayer(self.previewLayer)
self.previewLayer.frame = self.view.frame
}
self.showCameraFeed()
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
self.previewLayer.frame = self.view.frame
}
self.captureSession.startRunning()

2. Detect faces and draw bounding boxes on screen

private let videoDataOutput = AVCaptureVideoDataOutput()
private func getCameraFrames() {
self.videoDataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
self.videoDataOutput.alwaysDiscardsLateVideoFrames = true
self.videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
self.captureSession.addOutput(self.videoDataOutput)
guard let connection = self.videoDataOutput.connection(with: AVMediaType.video),
connection.isVideoOrientationSupported else { return }
connection.videoOrientation = .portrait
}
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
....
func captureOutput(
_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
print("did receive frame")
}
self.getCameraFrames()
console log
import Vision
private func detectFace(in image: CVPixelBuffer) {    let faceDetectionRequest = VNDetectFaceLandmarksRequest(completionHandler: { (request: VNRequest, error: Error?) in
DispatchQueue.main.async {
if let results = request.results as? [VNFaceObservation], results.count > 0 {
print("did detect \(results.count) face(s)")
} else {
print("did not detect any face")
}
}
})
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: image, orientation: .leftMirrored, options: [:])
try? imageRequestHandler.perform([faceDetectionRequest])
}
func captureOutput(_ output: AVCaptureOutput,
didOutput sampleBuffer: CMSampleBuffer,
from connection: AVCaptureConnection) {
guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
debugPrint("unable to get image from sample buffer")
return
}
self.detectFace(in: frame)
}
console log
private var drawings: [CAShapeLayer] = []
private func handleFaceDetectionResults(_ observedFaces: [VNFaceObservation]) {
self.clearDrawings()
let facesBoundingBoxes: [CAShapeLayer] = observedFaces.map({ (observedFace: VNFaceObservation) -> CAShapeLayer in let faceBoundingBoxOnScreen = self.previewLayer.layerRectConverted(fromMetadataOutputRect: observedFace.boundingBox)
let faceBoundingBoxPath = CGPath(rect: faceBoundingBoxOnScreen, transform: nil)
let faceBoundingBoxShape = CAShapeLayer()
faceBoundingBoxShape.path = faceBoundingBoxPath
faceBoundingBoxShape.fillColor = UIColor.clear.cgColor
faceBoundingBoxShape.strokeColor = UIColor.green.cgColor
return faceBoundingBoxShape
})
facesBoundingBoxes.forEach({ faceBoundingBox in self.view.layer.addSublayer(faceBoundingBox) })
self.drawings = facesBoundingBoxes
}
private func clearDrawings() {
self.drawings.forEach({ drawing in drawing.removeFromSuperlayer() })
}
private func detectFace(in image: CVPixelBuffer) {
let faceDetectionRequest = VNDetectFaceLandmarksRequest(completionHandler: { (request: VNRequest, error: Error?) in
DispatchQueue.main.async {
if let results = request.results as? [VNFaceObservation] {
self.handleFaceDetectionResults(results)
} else {
self.clearDrawings()
}
}
})
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: image, orientation: .leftMirrored, options: [:])
try? imageRequestHandler.perform([faceDetectionRequest])
}
Face with bounding box drawn
private func drawFaceFeatures(_ landmarks: VNFaceLandmarks2D, screenBoundingBox: CGRect) -> [CAShapeLayer] {    var faceFeaturesDrawings: [CAShapeLayer] = []if let leftEye = landmarks.leftEye {
let eyeDrawing = self.drawEye(leftEye, screenBoundingBox: screenBoundingBox)
faceFeaturesDrawings.append(eyeDrawing)
}
if let rightEye = landmarks.rightEye {
let eyeDrawing = self.drawEye(rightEye, screenBoundingBox: screenBoundingBox)
faceFeaturesDrawings.append(eyeDrawing)
}
// draw other face features here return faceFeaturesDrawings
}
private func drawEye(_ eye: VNFaceLandmarkRegion2D, screenBoundingBox: CGRect) -> CAShapeLayer { let eyePath = CGMutablePath()
let eyePathPoints = eye.normalizedPoints
.map({ eyePoint in
CGPoint(
x: eyePoint.y * screenBoundingBox.height + screenBoundingBox.origin.x,
y: eyePoint.x * screenBoundingBox.width + screenBoundingBox.origin.y)
})
eyePath.addLines(between: eyePathPoints)
eyePath.closeSubpath()
let eyeDrawing = CAShapeLayer()
eyeDrawing.path = eyePath
eyeDrawing.fillColor = UIColor.clear.cgColor
eyeDrawing.strokeColor = UIColor.green.cgColor

return eyeDrawing
}
private func handleFaceDetectionResults(_ observedFaces: [VNFaceObservation]) {

self.clearDrawings()
let facesBoundingBoxes: [CAShapeLayer] = observedFaces.flatMap({ (observedFace: VNFaceObservation) -> [CAShapeLayer] in
let faceBoundingBoxOnScreen = self.previewLayer.layerRectConverted(fromMetadataOutputRect: observedFace.boundingBox)
let faceBoundingBoxPath = CGPath(rect: faceBoundingBoxOnScreen, transform: nil)
let faceBoundingBoxShape = CAShapeLayer()
faceBoundingBoxShape.path = faceBoundingBoxPath
faceBoundingBoxShape.fillColor = UIColor.clear.cgColor
faceBoundingBoxShape.strokeColor = UIColor.green.cgColor
var newDrawings = [CAShapeLayer]()
newDrawings.append(faceBoundingBoxShape)
if let landmarks = observedFace.landmarks {
newDrawings = newDrawings + self.drawFaceFeatures(landmarks, screenBoundingBox: faceBoundingBoxOnScreen)
}
return newDrawings
})
facesBoundingBoxes.forEach({ faceBoundingBox in self.view.layer.addSublayer(faceBoundingBox) }) self.drawings = facesBoundingBoxes
}
Face with bounding box and eye contours drawing

Summary

  • Stream the camera feed from our iOS devices to the screen
  • Handle live images from the camera in our app
  • Use the Vision framework to process the image and detect face and face features
  • Convert image coordinates to screen coordinates
  • Draw onto the screen using CAShapeLayer

Final notes

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Anurag Ajwani

Senior iOS Engineer at Travelperk. 7+ years experience with iOS and Swift. Blogging my knowledge and experience.