导入
现在直播领域有一种比较新奇的直播形式,那就是虚拟直播,一般都叫做VTuber,就是不用真人入镜,而是用虚拟形象,比如一个动漫人物,但是虚拟形象的面部表情还是由真人控制的。我最近看到之后,发现还蛮有意思,正好前段时间,就在上篇博客,我介绍了用谷歌的mediapipe库实现一个简单的”动捕“程序,用到的是手势识别模块,同时后面也说明了这个模块还可以做很多有趣的事,比如虚拟键盘,虚拟绘画,手势控制游戏等。而这个库还有很多其他模块,比如面部特征检测,就正好用来实现一个自己的简单的“面捕”程序,自己实现一个虚拟直播程序。
完整代码
import cv2
import mediapipe as mp
import numpy as np
# 数学与姿态解算工具模块
class FaceGeometry:
def __init__(self):
self.INDEX_NOSE = 1
self.INDEX_CHIN = 152
self.INDEX_LEFT_EYE_CORNER = 33
self.INDEX_RIGHT_EYE_CORNER = 263
self.INDEX_LEFT_MOUTH_CORNER = 61
self.INDEX_RIGHT_MOUTH_CORNER = 291
# 3D 标定点(通用人脸模型的相对坐标)
self.face_3d = np.array([
[0.0, 0.0, 0.0], # 鼻尖
[0.0, -330.0, -65.0], # 下巴
[-225.0, 170.0, -135.0], # 左眼角
[225.0, 170.0, -135.0], # 右眼角
[-150.0, -150.0, -125.0], # 左嘴角
[150.0, -150.0, -125.0] # 右嘴角
], dtype=np.float64)
def get_head_pose(self, landmarks, img_w, img_h):
face_2d = []
points_idx = [self.INDEX_NOSE, self.INDEX_CHIN, self.INDEX_LEFT_EYE_CORNER,
self.INDEX_RIGHT_EYE_CORNER, self.INDEX_LEFT_MOUTH_CORNER, self.INDEX_RIGHT_MOUTH_CORNER]
for idx in points_idx:
lm = landmarks.landmark[idx]
x, y = int(lm.x * img_w), int(lm.y * img_h)
face_2d.append([x, y])
face_2d = np.array(face_2d, dtype=np.float64)
focal_length = 1 * img_w
cam_matrix = np.array([
[focal_length, 0, img_h / 2],
[0, focal_length, img_w / 2],
[0, 0, 1]
])
dist_matrix = np.zeros((4, 1), dtype=np.float64)
success, rot_vec, trans_vec = cv2.solvePnP(self.face_3d, face_2d, cam_matrix, dist_matrix)
if not success:
return 0, 0, 0
rmat, _ = cv2.Rodrigues(rot_vec)
angles, _, _, _, _, _ = cv2.RQDecomp3x3(rmat)
x_angle = angles[0] # Pitch
y_angle = angles[1] # Yaw
z_angle = angles[2] # Roll
# 限制角度范围
x_angle = np.clip(x_angle, -50, 50)
y_angle = np.clip(y_angle, -50, 50)
z_angle = np.clip(z_angle, -50, 50)
return x_angle, y_angle, z_angle
@staticmethod
def calculate_mar(landmarks):
top = np.array([landmarks[13].x, landmarks[13].y])
bottom = np.array([landmarks[14].x, landmarks[14].y])
return np.linalg.norm(top - bottom) * 100
# 虚拟形象绘制模块
class AvatarRenderer:
def __init__(self, width=800, height=600):
self.W = width
self.H = height
self.C_BG = (255, 240, 230)
self.C_SKIN = (220, 235, 255)
self.C_HAIR = (80, 60, 50)
self.C_EYE_WHITE = (250, 250, 250)
self.C_EYE_IRIS = (200, 100, 50)
self.C_BLUSH = (180, 180, 255)
self.pose_buffer = []
def draw(self, frame, pitch, yaw, roll, ear_left, ear_right, mar):
self.pose_buffer.append([pitch, yaw, roll])
if len(self.pose_buffer) > 5: self.pose_buffer.pop(0)
pitch, yaw, roll = np.mean(self.pose_buffer, axis=0)
canvas = np.zeros((self.H, self.W, 3), dtype=np.uint8)
canvas[:] = self.C_BG
cx, cy = self.W // 2, self.H // 2 + 50
offset_x = int(yaw * 3.0)
offset_y = int(pitch * 3.0)
offset_x = np.clip(offset_x, -80, 80)
offset_y = np.clip(offset_y, -60, 60)
M = cv2.getRotationMatrix2D((cx, cy), roll, 1.0)
# 1. 后发
cv2.ellipse(canvas, (cx, cy), (160, 170), 0, 0, 360, self.C_HAIR, -1, cv2.LINE_AA)
# 2. 脸部轮廓
cv2.ellipse(canvas, (cx, cy), (140, 160), 0, 0, 360, self.C_SKIN, -1, cv2.LINE_AA)
face_cx = cx - offset_x
face_cy = cy + offset_y
# 3. 眼睛
eye_spacing = 60
self._draw_eye(canvas, face_cx - eye_spacing, face_cy - 10, 35, 45, ear_left)
self._draw_eye(canvas, face_cx + eye_spacing, face_cy - 10, 35, 45, ear_right)
# 4. 腮红
cv2.ellipse(canvas, (face_cx - 70, face_cy + 40), (20, 15), 0, 0, 360, self.C_BLUSH, -1, cv2.LINE_AA)
cv2.ellipse(canvas, (face_cx + 70, face_cy + 40), (20, 15), 0, 0, 360, self.C_BLUSH, -1, cv2.LINE_AA)
# 5. 嘴巴
self._draw_mouth(canvas, face_cx, face_cy + 60, mar)
# 6. 鼻子
cv2.circle(canvas, (face_cx, face_cy + 20), 3, (160, 180, 210), -1, cv2.LINE_AA)
# 7. 前发
hair_cx = cx - int(offset_x * 0.6)
hair_cy = cy + int(offset_y * 0.6) - 140
pts = np.array([[hair_cx-140, hair_cy+80], [hair_cx, hair_cy], [hair_cx+140, hair_cy+80],
[hair_cx+150, hair_cy-60], [hair_cx-150, hair_cy-60]], np.int32)
cv2.fillPoly(canvas, [pts], self.C_HAIR, cv2.LINE_AA)
canvas = cv2.warpAffine(canvas, M, (self.W, self.H), borderValue=self.C_BG)
return canvas
def _draw_eye(self, canvas, x, y, w, h, ear):
if ear < 0.15:
cv2.ellipse(canvas, (x, y), (w, h//4), 0, 180, 360, (50, 40, 40), 3, cv2.LINE_AA)
cv2.line(canvas, (x-w, y), (x-w-5, y+5), (50,40,40), 2, cv2.LINE_AA)
else:
cv2.ellipse(canvas, (x, y), (w, h), 0, 0, 360, self.C_EYE_WHITE, -1, cv2.LINE_AA)
cv2.ellipse(canvas, (x, y), (w, h), 0, 0, 360, (0, 0, 0), 2, cv2.LINE_AA)
cv2.circle(canvas, (x, y+5), int(w*0.5), self.C_EYE_IRIS, -1, cv2.LINE_AA)
cv2.circle(canvas, (x - 10, y - 5), 8, (255, 255, 255), -1, cv2.LINE_AA)
cv2.ellipse(canvas, (x, y - h - 15), (w, 5), 0, 180, 360, self.C_HAIR, 3, cv2.LINE_AA)
def _draw_mouth(self, canvas, x, y, mar):
if mar > 3.0:
h = min(int(mar * 5), 50)
cv2.ellipse(canvas, (x, y), (30, h), 0, 0, 360, (100, 100, 200), -1, cv2.LINE_AA)
cv2.ellipse(canvas, (x, y), (30, h), 0, 0, 360, (80, 80, 150), 2, cv2.LINE_AA)
else:
cv2.ellipse(canvas, (x, y - 5), (20, 10), 0, 0, 180, (100, 80, 80), 3, cv2.LINE_AA)
def main():
cap = cv2.VideoCapture(0)
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
face_mesh = mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
renderer = AvatarRenderer()
geometry = FaceGeometry()
# True: 显示摄像头画面, False: 显示黑底网格
show_real_face = True
print("启动中...")
print("按 'e' 键切换 隐私模式/真人模式")
print("按 'q' 键退出")
while cap.isOpened():
success, image = cap.read()
if not success: continue
image = cv2.flip(image, 1)
h, w, _ = image.shape
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 始终使用真实图像进行计算,保证数据捕捉
results = face_mesh.process(rgb_image)
# 准备用于显示的预览图
if show_real_face:
# 模式A: 复制摄像头画面
preview_img = image.copy()
else:
# 模式B: 创建一个全黑的画布,大小与摄像头一致
preview_img = np.zeros((h, w, 3), dtype=np.uint8)
pitch, yaw, roll = 0, 0, 0
ear_left, ear_right = 0.3, 0.3
mar = 0.0
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
# 绘制网格:画在 preview_img 上
mp_drawing.draw_landmarks(
image=preview_img,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks(
image=preview_img,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
# 数据计算
pitch, yaw, roll = geometry.get_head_pose(face_landmarks, w, h)
lm = face_landmarks.landmark
left_v = np.linalg.norm(np.array([lm[159].x, lm[159].y]) - np.array([lm[145].x, lm[145].y]))
left_h = np.linalg.norm(np.array([lm[33].x, lm[33].y]) - np.array([lm[133].x, lm[133].y]))
ear_left = left_v / (left_h + 1e-6)
right_v = np.linalg.norm(np.array([lm[386].x, lm[386].y]) - np.array([lm[374].x, lm[374].y]))
right_h = np.linalg.norm(np.array([lm[362].x, lm[362].y]) - np.array([lm[263].x, lm[263].y]))
ear_right = right_v / (right_h + 1e-6)
mar = geometry.calculate_mar(lm)
# 绘制 Avatar
avatar_img = renderer.draw(image, pitch, yaw, roll, ear_left, ear_right, mar)
# 拼接画面:把 preview_img 放到左上角
cam_h, cam_w = 225, 300
cam_preview = cv2.resize(preview_img, (cam_w, cam_h))
avatar_img[0:cam_h, 0:cam_w] = cam_preview
# 显示状态文字
mode_text = "REAL" if show_real_face else "PRIVACY (Mesh Only)"
color_text = (0, 255, 0) if show_real_face else (0, 0, 255) # 绿/红
cv2.putText(avatar_img, f"Mode: {mode_text} (Press 'e')", (400, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color_text, 2)
cv2.putText(avatar_img, f"Pitch:{int(pitch)} Yaw:{int(yaw)} Roll:{int(roll)}",
(400, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (50, 50, 50), 2)
cv2.imshow('Virtual Avatar VTuber', avatar_img)
# 按键监听
key = cv2.waitKey(5) & 0xFF
if key == ord('q'):
break
elif key == ord('e'):
show_real_face = not show_real_face # 切换状态
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
代码很简单,只依赖mediapipe,numpy,opencv这三个库,需要注意利用脸部关键点进行脸部姿态估计,也就是Pitch,Yaw,Roll这三个角的估计的算法叫:PnP(Perspective-n-Point)算法,专门用于将3D位置变换到2D姿态,感兴趣自己去了解即可。
效果

实现了2个模式,一个是隐私模式,一个是真人模式,上面显示的是隐私模式,也就是只能看到面部特征绘制到黑色背景。按E键可以切换到真人模式,此时可以看到真人绘制在真实背景下。