Python Mouse Control Remotely With Your Hand

Gökhan ERGEN
3 min readJul 30, 2023

--

Normally, we use computer mouse but we can use it with our hand remotely.

In Python, this is possible by using some libraries such as mediapipe, pyautogui and opencv. Here, mediapipe provides us to track our hand points that we use for actions which are left, right and double clicks.

pyautogui is used for mouse events which are moving and click actions. But the project can be optimized well.

from concurrent.futures import ThreadPoolExecutor
from math import sqrt
import cv2
import mediapipe as mp
import psutil
from entities.mouse import Mouse
from win32api import GetSystemMetrics

grabSize = 50

pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)

mp_draw = mp.solutions.drawing_utils
mouse = Mouse(GetSystemMetrics(0), GetSystemMetrics(1))


def resolution(cap, w, h):
cap.set(3, w)
cap.set(4, h)


def isExist(value):
return bool(value)


def getDistance(a, b, hands_landmarks, scale=False, w=0, h=0):
dx = hands_landmarks[0].landmark[a].x - hands_landmarks[0].landmark[b].x
dy = hands_landmarks[0].landmark[a].y - hands_landmarks[0].landmark[b].y
if (not scale):
return sqrt(dx ** 2 + dy ** 2)
else:
return sqrt((dx * w) ** 2 + (dy * h) ** 2)


def draw(frame, hands_landmarks):
h, w, c = frame.shape

distance_three_to_seventeen = round(getDistance(3, 17, hands_landmarks, True, w, h))
distance_zero_to_ten = round(getDistance(0, 10, hands_landmarks, True, w, h))
distance_left = getDistance(4, 8, hands_landmarks)
distance_double_click = getDistance(4, 12, hands_landmarks)
distance_point = getDistance(9, 0, hands_landmarks)
distance_right = getDistance(16, 4, hands_landmarks)
distance_width = getDistance(4, 20, hands_landmarks)
distance_height = getDistance(0, 12, hands_landmarks)

wrapped_frame = frame[int(distance_zero_to_ten):int(h - distance_zero_to_ten),
int(distance_three_to_seventeen):int(w - distance_three_to_seventeen)]


wh, ww, wc = wrapped_frame.shape
wx = w * (hands_landmarks[0].landmark[0].x + hands_landmarks[0].landmark[9].x) / 2 - distance_three_to_seventeen
wy = h * (hands_landmarks[0].landmark[0].y + hands_landmarks[0].landmark[9].y) / 2 - distance_zero_to_ten

if (wx < 0):
wx = 1
if (wy < 0):
wy = 1

try:
x = round((wx) * (mouse._swidth / (ww)))
y = round((wy) * (mouse._sheight / (wh)))
except ZeroDivisionError:
return

mouse.move(mouse._swidth - x + (0 if (mouse._swidth - x) < mouse._swidth / 2 else distance_width), y +
(0 if (mouse._sheight - y) < mouse._sheight / 2 else distance_height))

if (distance_left * 100 < distance_point * 100 / 5):
mouse.click()
elif (distance_right * 100 < distance_point * 100 / 5):
mouse.click(button="right")
elif (distance_double_click * 100 < distance_point * 100 / 5):
mouse.click(button="double")


def start(cam_code=0):
cap = cv2.VideoCapture(cam_code)

while (True):
ret, frame = cap.read()
if not ret:
print("failed to grab frame")
break
frame = cv2.GaussianBlur(frame, (3, 3), 0)
grappedImage = frame[grabSize:frame.shape[0] - grabSize, grabSize:frame.shape[1] - grabSize]

rgb_img = cv2.cvtColor(grappedImage, cv2.COLOR_BGR2RGB)
results = hands.process(rgb_img)
hands_landmarks = results.multi_hand_landmarks

if (isExist(hands_landmarks)):

try:
cv2.imshow("hand", grappedImage[int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y):
int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y + int(
grappedImage.shape[0] * hands_landmarks[0].landmark[0].y) - int(
grappedImage.shape[0] * hands_landmarks[0].landmark[12].y)),
int(grappedImage.shape[1] * hands_landmarks[0].landmark[20].x):int(
grappedImage.shape[1] * hands_landmarks[0].landmark[20].x) + int(
grappedImage.shape[1] * hands_landmarks[0].landmark[4].x) - int(
grappedImage.shape[1] * hands_landmarks[0].landmark[20].x)])
except:
pass;
pool.submit(draw, grappedImage, hands_landmarks)

cv2.imshow("Hand tracer", grappedImage)
if (cv2.waitKey(1) == ord('q')):
break

cap.release()
cv2.destroyAllWindows()

cap.py is used for grabbing frames. Frames contains the values of pixels’ color which is RGB format and hands_landmarks contains our hands and its’ points. Thread pool is used here to make the render time fast but maybe, this is not good solution so we can improve this.

pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())

The pool size of threads was specified this according to gpu’s power.

Euclidean distance is very important here because I used it to calculate the distance between hand marks. This is for mouse actions.

if (distance_left * 100 < distance_point * 100 / 5):
mouse.click()
elif (distance_right * 100 < distance_point * 100 / 5):
mouse.click(button="right")
elif (distance_double_click * 100 < distance_point * 100 / 5):
mouse.click(button="double")

I specified a technique to make a decision. On the screen, the position of mouse is specified by scaling from hand point calculated to our computer screen position.


import pyautogui,time,threading


class Mouse():
def __init__(self,screen_width,screen_height):
self._swidth=screen_width;
self._sheight=screen_height;

def move(self,x,y):
if(y>0 and x>0 and y<self._sheight and x<self._swidth):
pyautogui.moveTo((x,y))

def start_click(self,button):
if(self._swidth>pyautogui.position()[0] and pyautogui.position()[1]<self._sheight):
if(button=="left"):
pyautogui.click(pyautogui.position()[0],pyautogui.position()[1])
elif(button=="right"):
pyautogui.click(pyautogui.position()[0],pyautogui.position()[1],button="right")
else:
pyautogui.doubleClick(pyautogui.position()[0],pyautogui.position()[1])

def click(self,button="left"):
time.sleep(0.15)
thread=threading.Thread(target=self.start_click,args=(button,),daemon=False)
thread.start();
def move(self,x,y):
if(y>0 and x>0 and y<self._sheight and x<self._swidth):
pyautogui.moveTo((x,y))

The position taken from frame is moved there. We can see the other actions such as left and right click.

def cam_control(code):
cap=__import__("cv2").VideoCapture(code);
res,frame=cap.read()
__import__("cv2").imshow("test",frame)
__import__("cv2").destroyAllWindows();
cap.release();

You can check if your camera is working

from core.cap import start

if __name__ == '__main__':
start(0)

Finally, we can start the application. The application can be improved because there are some problem such as clicking itself or being difficult to click in some positions.

https://github.com/gokhanergen-tech/Remote-Computer-Control-With-Hand

--

--