SC learning process: opinions, advice

Try this python script, a very clean demo of the lib:

pip install mediapipe opencv-python numpy
import cv2
import mediapipe as mp
import numpy as np

class HandTracker:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=2,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.5
        )
        self.mp_draw = mp.solutions.drawing_utils
        
    def find_hands(self, img, draw=True):
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        self.results = self.hands.process(img_rgb)
        
        if self.results.multi_hand_landmarks:
            for hand_landmarks in self.results.multi_hand_landmarks:
                if draw:
                    self.mp_draw.draw_landmarks(
                        img,
                        hand_landmarks,
                        self.mp_hands.HAND_CONNECTIONS
                    )
        return img
    
    def find_finger_positions(self, img, hand_no=0):
        landmark_list = []
        finger_names = ['THUMB', 'INDEX', 'MIDDLE', 'RING', 'PINKY']
        
        if self.results.multi_hand_landmarks:
            if len(self.results.multi_hand_landmarks) > hand_no:
                hand = self.results.multi_hand_landmarks[hand_no]
                
                finger_tips = [4, 8, 12, 16, 20] 
                
                for idx, tip_id in enumerate(finger_tips):

                    cx = int(hand.landmark[tip_id].x * img.shape[1])
                    cy = int(hand.landmark[tip_id].y * img.shape[0])
                    landmark_list.append({
                        'finger': finger_names[idx],
                        'position': (cx, cy)
                    })
                    
                    cv2.circle(img, (cx, cy), 10, (255, 0, 255), cv2.FILLED)
                    
        return landmark_list

def main():
    cap = cv2.VideoCapture(0)
    detector = HandTracker()
    
    while True:
        success, img = cap.read()
        if not success:
            break
            
        img = detector.find_hands(img)
        finger_positions = detector.find_finger_positions(img)
        
        if finger_positions:
            y_pos = 30
            for finger in finger_positions:
                text = f"{finger['finger']}: {finger['position']}"
                cv2.putText(img, text, (10, y_pos), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, 
                           (255, 255, 255), 2)
                y_pos += 30
        
        cv2.imshow("Hand Tracking", img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()