Try this python script, a very clean demo of the lib:
pip install mediapipe opencv-python numpy
import cv2
import mediapipe as mp
import numpy as np
class HandTracker:
def __init__(self):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.7,
min_tracking_confidence=0.5
)
self.mp_draw = mp.solutions.drawing_utils
def find_hands(self, img, draw=True):
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(img_rgb)
if self.results.multi_hand_landmarks:
for hand_landmarks in self.results.multi_hand_landmarks:
if draw:
self.mp_draw.draw_landmarks(
img,
hand_landmarks,
self.mp_hands.HAND_CONNECTIONS
)
return img
def find_finger_positions(self, img, hand_no=0):
landmark_list = []
finger_names = ['THUMB', 'INDEX', 'MIDDLE', 'RING', 'PINKY']
if self.results.multi_hand_landmarks:
if len(self.results.multi_hand_landmarks) > hand_no:
hand = self.results.multi_hand_landmarks[hand_no]
finger_tips = [4, 8, 12, 16, 20]
for idx, tip_id in enumerate(finger_tips):
cx = int(hand.landmark[tip_id].x * img.shape[1])
cy = int(hand.landmark[tip_id].y * img.shape[0])
landmark_list.append({
'finger': finger_names[idx],
'position': (cx, cy)
})
cv2.circle(img, (cx, cy), 10, (255, 0, 255), cv2.FILLED)
return landmark_list
def main():
cap = cv2.VideoCapture(0)
detector = HandTracker()
while True:
success, img = cap.read()
if not success:
break
img = detector.find_hands(img)
finger_positions = detector.find_finger_positions(img)
if finger_positions:
y_pos = 30
for finger in finger_positions:
text = f"{finger['finger']}: {finger['position']}"
cv2.putText(img, text, (10, y_pos),
cv2.FONT_HERSHEY_SIMPLEX, 0.7,
(255, 255, 255), 2)
y_pos += 30
cv2.imshow("Hand Tracking", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()