手部21個(gè)關(guān)鍵點(diǎn)檢測(cè)+手勢(shì)識(shí)別-[MediaPipe]
#MediaPipe 是一款由 Google Research 開(kāi)發(fā)并開(kāi)源的多媒體機(jī)器學(xué)習(xí)模型應(yīng)用框架,可以直接調(diào)用其API完成目標(biāo)檢測(cè)、人臉檢測(cè)以及關(guān)鍵點(diǎn)檢測(cè)等。本篇文章介紹其手部21個(gè)關(guān)鍵點(diǎn)檢測(cè)(win10,python版)
安裝mediapipe
pip install mediapipe
創(chuàng)建手部檢測(cè)模型
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.75,
min_tracking_confidence=0.5)
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.75,
min_tracking_confidence=0.5)
hands是檢測(cè)手部關(guān)鍵點(diǎn)的函數(shù),其中有4個(gè)輸入?yún)?shù)量可以選擇
1、static_image_mode:默認(rèn)為False,如果設(shè)置為false, 就是把輸入看作一個(gè)視頻流,在檢測(cè)到手之后對(duì)手加了一個(gè)目標(biāo)跟蹤(目標(biāo)檢測(cè)+跟蹤),無(wú)需調(diào)用另一次檢測(cè),直到失去對(duì)任何手的跟蹤為止。如果設(shè)置為True,則手部檢測(cè)將在每個(gè)輸入圖像上運(yùn)行(目標(biāo)檢測(cè)),非常適合處理一批靜態(tài)的,可能不相關(guān)的圖像。(如果檢測(cè)的是圖片就要設(shè)置成True)
2、max_num_hands:可以檢測(cè)到的手的數(shù)量最大值,默認(rèn)是2
3、min_detection_confidence: 手部檢測(cè)的最小置信度值,大于這個(gè)數(shù)值被認(rèn)為是成功的檢測(cè)。默認(rèn)為0.5
4、min_tracking_confidence:目標(biāo)蹤模型的最小置信度值,大于這個(gè)數(shù)值將被視為已成功跟蹤的手部,默認(rèn)為0.5,如果static_image_mode設(shè)置為true,則忽略此操作。
結(jié)果輸出
results = hands.process(frame)
print(results.multi_handedness)
print(results.multi_hand_landmarks)
results.multi_handedness: 包括label和score,label是字符串"Left"或"Right",score是置信度
results.multi_hand_landmarks: 手部21個(gè)關(guān)鍵點(diǎn)的位置信息,包括x,y,z 其中x,y是歸一化后的坐標(biāo)。z代表地標(biāo)深度,以手腕處的深度為原點(diǎn),值越小,地標(biāo)就越靠近相機(jī)(我暫時(shí)也不清楚啥意思)
視頻檢測(cè)代碼
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=0.75,
min_tracking_confidence=0.75)
cap = cv2.VideoCapture(0)
while True:
ret,frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 因?yàn)閿z像頭是鏡像的,所以將攝像頭水平翻轉(zhuǎn)
# 不是鏡像的可以不翻轉(zhuǎn)
frame= cv2.flip(frame,1)
results = hands.process(frame)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
if results.multi_handedness:
for hand_label in results.multi_handedness:
print(hand_label)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
print('hand_landmarks:' hand_landmarks)
# 關(guān)鍵點(diǎn)可視化
mp_drawing.draw_landmarks(
frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
cv2.imshow('MediaPipe Hands', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
關(guān)鍵點(diǎn)識(shí)別結(jié)果

手勢(shì)識(shí)別
通過(guò)對(duì)檢測(cè)到的手部關(guān)鍵點(diǎn)之間的角度計(jì)算便可以實(shí)現(xiàn)簡(jiǎn)單的手勢(shì)識(shí)別(有局限性),比如計(jì)算大拇指向量0-2和3-4之間的角度,它們之間的角度大于某一個(gè)角度閾值(經(jīng)驗(yàn)值)定義為彎曲,小于某一個(gè)閾值(經(jīng)驗(yàn)值)為伸直。
加入手勢(shì)判別完整代碼
import cv2
import mediapipe as mp
import math
def vector_2d_angle(v1,v2):
'''
求解二維向量的角度
'''
v1_x=v1[0]
v1_y=v1[1]
v2_x=v2[0]
v2_y=v2[1]
try:
angle_= math.degrees(math.acos((v1_x*v2_x+v1_y*v2_y)/(((v1_x**2+v1_y**2)**0.5)*((v2_x**2+v2_y**2)**0.5))))
except:
angle_ =65535.
if angle_ , 180.:
angle_ = 65535.
return angle_
def hand_angle(hand_):
'''
獲取對(duì)應(yīng)手相關(guān)向量的二維角度,根據(jù)角度確定手勢(shì)
'''
angle_list = []
#---------------------------- thumb 大拇指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[2][0])),(int(hand_[0][1])-int(hand_[2][1]))),
((int(hand_[3][0])- int(hand_[4][0])),(int(hand_[3][1])- int(hand_[4][1])))
)
angle_list.append(angle_)
#---------------------------- index 食指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])-int(hand_[6][0])),(int(hand_[0][1])- int(hand_[6][1]))),
((int(hand_[7][0])- int(hand_[8][0])),(int(hand_[7][1])- int(hand_[8][1])))
)
angle_list.append(angle_)
#---------------------------- middle 中指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[10][0])),(int(hand_[0][1])- int(hand_[10][1]))),
((int(hand_[11][0])- int(hand_[12][0])),(int(hand_[11][1])- int(hand_[12][1])))
)
angle_list.append(angle_)
#---------------------------- ring 無(wú)名指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[14][0])),(int(hand_[0][1])- int(hand_[14][1]))),
((int(hand_[15][0])- int(hand_[16][0])),(int(hand_[15][1])- int(hand_[16][1])))
)
angle_list.append(angle_)
#---------------------------- pink 小拇指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[18][0])),(int(hand_[0][1])- int(hand_[18][1]))),
((int(hand_[19][0])- int(hand_[20][0])),(int(hand_[19][1])- int(hand_[20][1])))
)
angle_list.append(angle_)
return angle_list
def h_gesture(angle_list):
'''
# 二維約束的方法定義手勢(shì)
# fist five gun love one six three thumbup yeah
'''
thr_angle = 65.
thr_angle_thumb = 53.
thr_angle_s = 49.
gesture_str = None
if 65535. not in angle_list:
if (angle_list[0],thr_angle_thumb) and (angle_list[1],thr_angle) and (angle_list[2],thr_angle) and (angle_list[3],thr_angle) and (angle_list[4],thr_angle):
gesture_str = "fist"
elif (angle_list[0]<thr_angle_s) and (angle_list[1]<thr_angle_s) and (angle_list[2]<thr_angle_s) and (angle_list[3]<thr_angle_s) and (angle_list[4]<thr_angle_s):
gesture_str = "five"
elif (angle_list[0]<thr_angle_s) and (angle_list[1]<thr_angle_s) and (angle_list[2],thr_angle) and (angle_list[3],thr_angle) and (angle_list[4],thr_angle):
gesture_str = "gun"
elif (angle_list[0]<thr_angle_s) and (angle_list[1]<thr_angle_s) and (angle_list[2],thr_angle) and (angle_list[3],thr_angle) and (angle_list[4]<thr_angle_s):
gesture_str = "love"
elif (angle_list[0],5) and (angle_list[1]<thr_angle_s) and (angle_list[2],thr_angle) and (angle_list[3],thr_angle) and (angle_list[4],thr_angle):
gesture_str = "one"
elif (angle_list[0]<thr_angle_s) and (angle_list[1],thr_angle) and (angle_list[2],thr_angle) and (angle_list[3],thr_angle) and (angle_list[4]<thr_angle_s):
gesture_str = "six"
elif (angle_list[0],thr_angle_thumb) and (angle_list[1]<thr_angle_s) and (angle_list[2]<thr_angle_s) and (angle_list[3]<thr_angle_s) and (angle_list[4],thr_angle):
gesture_str = "three"
elif (angle_list[0]<thr_angle_s) and (angle_list[1],thr_angle) and (angle_list[2],thr_angle) and (angle_list[3],thr_angle) and (angle_list[4],thr_angle):
gesture_str = "thumbUp"
elif (angle_list[0],thr_angle_thumb) and (angle_list[1]<thr_angle_s) and (angle_list[2]<thr_angle_s) and (angle_list[3],thr_angle) and (angle_list[4],thr_angle):
gesture_str = "two"
return gesture_str
def detect():
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.75,
min_tracking_confidence=0.75)
cap = cv2.VideoCapture(0)
while True:
ret,frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame= cv2.flip(frame,1)
results = hands.process(frame)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
hand_local = []
for i in range(21):
x = hand_landmarks.landmark[i].x*frame.shape[1]
y = hand_landmarks.landmark[i].y*frame.shape[0]
hand_local.append((x,y))
if hand_local:
angle_list = hand_angle(hand_local)
gesture_str = h_gesture(angle_list)
cv2.putText(frame,gesture_str,(0,100),0,1.3,(0,0,255),3)
cv2.imshow('MediaPipe Hands', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
if __name__ == '__main__':
detect()
