实现一个简单的“动捕”

导入

现在国内的动画很多都走向3D了,而3D技术一个非常方便的地方就是可以用真人进行动作捕获,然后导入到建模系统中,形成骨骼动画,不仅效率提升很多,效果也非常逼真。不过这其实和这篇博客关系不大,虽然我会实现一个简单的“动捕”效果,但是和传统意义上动捕不一样,传统是全身绑满传感设备,然后捕获动作,这里只是用视觉的算法来捕获。这篇博客的主要目的是向大家介绍一个视觉库,那就是google的mediapipe库,这个库有很多模块,涉及是视觉的多个任务,其中一个就是手势识别,也是本文中会用到的模块。

关于mediapipe,它是一个面向移动设备,也就是手机端的库,优化也基本都是针对移动端,因此我们在电脑端来用它只能说作为一个小玩具来使用,但是玩具虽小,功能还是挺多的,而且胜在使用起来十分简单,我们可以轻松利用这个库做一些很有趣的非产品级项目。

本文将会实现一个手势识别的回放程序,可以录制捕获我们现实的手部动作,导出到本地数据,然后可以用虚拟的手掌进行回放。

完整代码

# hand_capture_replay.py
import sys
import json
import numpy as np
import cv2
import mediapipe as mp
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QWidget, QVBoxLayout,
    QHBoxLayout, QPushButton, QLabel, QRadioButton, QButtonGroup
)
from PyQt5.QtCore import QTimer, Qt, QPointF, QRectF
from PyQt5.QtGui import (
    QPainter, QPen, QBrush, QColor, QPainterPath, QFont,
    QRadialGradient
)
import os
from datetime import datetime


# 初始化 MediaPipe 手部模型
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
HAND_CONNECTIONS = mp_hands.HAND_CONNECTIONS


class HandCaptureWidget(QWidget):
    """用于显示摄像头画面并进行手势识别的组件"""
    def __init__(self):
        super().__init__()
        self.cap = cv2.VideoCapture(0)
        self.hands = mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,  # 控制只识别一只手,默认是2
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7
        )
        self.recording = False
        self.recorded_data = []
        self.display_mode = "full"  # "full" 或 "hand_only"

        # 定时器
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.update_frame)
        self.timer.start(30)

        self.setFixedSize(640, 480)
        self.image = None
        self.current_landmarks = None

    def set_display_mode(self, mode):
        """设置显示模式"""
        self.display_mode = mode

    def update_frame(self):
        ret, frame = self.cap.read()
        if not ret:
            return

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.hands.process(rgb_frame)

        self.current_landmarks = None
        if results.multi_hand_landmarks:
            # 这里[0]就是只获取第一只手的数据
            hand_landmarks = results.multi_hand_landmarks[0]
            self.current_landmarks = hand_landmarks

            if self.recording:
                landmarks_list = []
                for lm in hand_landmarks.landmark:
                    landmarks_list.append({
                        'x': lm.x,
                        'y': lm.y,
                        'z': lm.z
                    })
                self.recorded_data.append(landmarks_list)

            if self.display_mode == "full":
                # 使用 MediaPipe 自带的绘制函数
                mp_drawing.draw_landmarks(
                    rgb_frame,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(255, 200, 150), thickness=2, circle_radius=4),  # 关键点
                    mp_drawing.DrawingSpec(color=(180, 120, 80), thickness=2)  # 连接线
                )

        # 根据模式决定最终显示内容
        display_img = rgb_frame.copy()
        if self.display_mode == "hand_only":
            display_img = np.zeros_like(rgb_frame)
            if self.current_landmarks:
                self.draw_hand_on_image(display_img, self.current_landmarks)

        h, w, ch = display_img.shape
        bytes_per_line = ch * w
        self.image = self.rgb2qpixmap(display_img, w, h, bytes_per_line)
        self.update()

    def draw_hand_on_image(self, img, landmarks):
        """在纯黑背景上绘制手部"""
        h, w = img.shape[:2]
        # 绘制连接线
        for connection in HAND_CONNECTIONS:
            start_idx, end_idx = connection
            start = landmarks.landmark[start_idx]
            end = landmarks.landmark[end_idx]
            x1, y1 = int(start.x * w), int(start.y * h)
            x2, y2 = int(end.x * w), int(end.y * h)
            cv2.line(img, (x1, y1), (x2, y2), (255, 200, 150), 3)
        # 绘制关键点
        for lm in landmarks.landmark:
            x, y = int(lm.x * w), int(lm.y * h)
            cv2.circle(img, (x, y), 6, (255, 255, 255), -1)

    def rgb2qpixmap(self, rgb_img, width, height, bytes_per_line):
        from PyQt5.QtGui import QImage, QPixmap
        qimg = QImage(rgb_img.data, width, height, bytes_per_line, QImage.Format_RGB888)
        return QPixmap.fromImage(qimg)

    def paintEvent(self, event):
        if self.image:
            painter = QPainter(self)
            painter.drawPixmap(self.rect(), self.image)

    def start_recording(self):
        self.recording = True
        self.recorded_data = []

    def stop_recording(self):
        self.recording = False
        if not self.recorded_data:
            return None
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"hand_gesture_{timestamp}.json"
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(self.recorded_data, f, indent=2)
        print(f"手势数据已保存至: {filename}")
        return filename

    def release(self):
        self.timer.stop()
        self.cap.release()
        self.hands.close()


class HandReplayWidget(QWidget):
    """用于回放手势动画的组件,绘制虚拟手掌"""
    def __init__(self):
        super().__init__()
        self.setFixedSize(640, 480)
        self.landmarks_sequence = []
        self.current_frame = 0
        self.playing = False
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.next_frame)
        self.connections = HAND_CONNECTIONS

    def load_latest_gesture(self):
        json_files = [f for f in os.listdir('.') if f.startswith('hand_gesture_') and f.endswith('.json')]
        if not json_files:
            return False
        latest_file = max(json_files, key=os.path.getctime)
        with open(latest_file, 'r', encoding='utf-8') as f:
            self.landmarks_sequence = json.load(f)
        self.current_frame = 0
        return True

    def start_replay(self):
        if not self.landmarks_sequence:
            if not self.load_latest_gesture():
                return
        self.playing = True
        self.current_frame = 0
        self.timer.start(50)

    def next_frame(self):
        if self.current_frame >= len(self.landmarks_sequence):
            self.timer.stop()
            self.playing = False
            self.current_frame = 0
            self.update()
            return
        self.update()
        self.current_frame += 1

    def paintEvent(self, event):
        painter = QPainter(self)
        painter.setRenderHint(QPainter.Antialiasing)
        painter.setRenderHint(QPainter.SmoothPixmapTransform)

        # 深色背景
        painter.fillRect(self.rect(), QColor(20, 20, 30))

        if not self.landmarks_sequence or self.current_frame >= len(self.landmarks_sequence):
            painter.setPen(QColor(200, 200, 200))
            font = QFont()
            font.setPointSize(16)
            painter.setFont(font)
            painter.drawText(self.rect(), Qt.AlignCenter, "点击“模拟”加载手势回放")
            return

        landmarks = self.landmarks_sequence[self.current_frame]
        w, h = self.width(), self.height()
        points = []
        for lm in landmarks:
            x = (1 - lm['x']) * w  # 镜像x
            y = lm['y'] * h
            points.append(QPointF(x, y))

        if len(points) < 21:
            return

        # === 绘制手掌区域(带渐变)===
        palm_indices = [0, 1, 2, 5, 9, 13, 17, 0]
        palm_path = QPainterPath()
        palm_path.moveTo(points[0])
        for idx in palm_indices[1:]:
            palm_path.lineTo(points[idx])
        palm_path.closeSubpath()

        # 创建径向渐变(从掌心向外变暗)
        center = points[0]  # 以手腕为起点
        gradient = QRadialGradient(center, 200)
        gradient.setColorAt(0, QColor(230, 190, 150))
        gradient.setColorAt(1, QColor(200, 160, 120))
        painter.fillPath(palm_path, QBrush(gradient))

        # === 绘制手指骨骼(带阴影)===
        # 先绘制阴影(偏移+半透明)
        shadow_offset = QPointF(3, 3)
        painter.setPen(QPen(QColor(0, 0, 0, 80), 5))
        for connection in self.connections:
            start_idx, end_idx = connection
            if start_idx < len(points) and end_idx < len(points):
                p1 = points[start_idx] + shadow_offset
                p2 = points[end_idx] + shadow_offset
                painter.drawLine(p1, p2)

        # 再绘制主骨骼
        painter.setPen(QPen(QColor(180, 120, 80), 4))
        for connection in self.connections:
            start_idx, end_idx = connection
            if start_idx < len(points) and end_idx < len(points):
                painter.drawLine(points[start_idx], points[end_idx])

        # === 绘制关节(凸起效果)===
        painter.setPen(Qt.NoPen)
        for i, pt in enumerate(points):
            # 关节颜色略深
            joint_color = QColor(220, 170, 130)
            if i in [4, 8, 12, 16, 20]:  # 指尖
                joint_color = QColor(240, 200, 170)
            painter.setBrush(QBrush(joint_color))
            painter.drawEllipse(pt, 7, 7)

        # === 绘制指甲(指尖白色椭圆)===
        fingertips = [4, 8, 12, 16, 20]
        nail_color = QColor(255, 250, 245)
        for idx in fingertips:
            if idx < len(points):
                tip = points[idx]
                # 获取手指方向(从前一关节到指尖)
                prev_idx = idx - 1
                if prev_idx >= 0:
                    direction = tip - points[prev_idx]
                    length = (direction.x()**2 + direction.y()**2)**0.5
                    if length > 0:
                        dx = direction.x() / length
                        dy = direction.y() / length
                    else:
                        dx, dy = 0, -1
                else:
                    dx, dy = 0, -1

                # 指甲方向与手指一致
                painter.save()
                painter.translate(tip.x(), tip.y())
                angle = np.degrees(np.arctan2(dy, dx))
                painter.rotate(angle)

                # 绘制椭圆指甲
                nail_rect = QRectF(-6, -3, 12, 6)
                painter.setBrush(QBrush(nail_color))
                painter.setPen(QPen(QColor(230, 220, 210), 1))
                painter.drawEllipse(nail_rect)
                painter.restore()


class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("手势捕获与回放系统")
        self.setGeometry(100, 100, 1320, 600)

        # 组件
        self.capture_widget = HandCaptureWidget()
        self.replay_widget = HandReplayWidget()

        # 按钮
        self.btn_record = QPushButton("录制")
        self.btn_stop = QPushButton("停止录制")
        self.btn_replay = QPushButton("模拟")

        # RadioButton 控制显示模式
        self.radio_full = QRadioButton("完整画面")
        self.radio_hand_only = QRadioButton("仅手部")
        self.radio_full.setChecked(True)
        self.radio_group = QButtonGroup()
        self.radio_group.addButton(self.radio_full)
        self.radio_group.addButton(self.radio_hand_only)

        # 连接信号
        self.radio_full.toggled.connect(self.on_radio_toggled)
        self.btn_record.clicked.connect(self.start_recording)
        self.btn_stop.clicked.connect(self.stop_recording)
        self.btn_replay.clicked.connect(self.replay_latest)

        # 样式
        for btn in [self.btn_record, self.btn_stop, self.btn_replay]:
            btn.setStyleSheet("font-size: 14px; padding: 6px;")

        # 布局:上下对齐两个显示区域
        top_layout = QHBoxLayout()
        top_layout.addWidget(QLabel("摄像头画面"))
        top_layout.addWidget(self.radio_full)
        top_layout.addWidget(self.radio_hand_only)

        display_layout = QHBoxLayout()
        display_layout.addWidget(self.capture_widget)
        display_layout.addWidget(self.replay_widget)

        control_layout = QHBoxLayout()
        control_layout.addWidget(self.btn_record)
        control_layout.addWidget(self.btn_stop)
        control_layout.addWidget(self.btn_replay)

        main_layout = QVBoxLayout()
        main_layout.addLayout(top_layout)
        main_layout.addLayout(display_layout)
        main_layout.addLayout(control_layout)

        container = QWidget()
        container.setLayout(main_layout)
        self.setCentralWidget(container)

    def on_radio_toggled(self):
        if self.radio_full.isChecked():
            self.capture_widget.set_display_mode("full")
        else:
            self.capture_widget.set_display_mode("hand_only")

    def start_recording(self):
        self.capture_widget.start_recording()
        print("开始录制手势...")

    def stop_recording(self):
        filename = self.capture_widget.stop_recording()
        if filename:
            print("录制已停止并保存。")

    def replay_latest(self):
        self.replay_widget.start_replay()

    def closeEvent(self, event):
        self.capture_widget.release()
        event.accept()


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = MainWindow()
    window.show()
    sys.exit(app.exec_())


基本依赖:mediapipe, numpy , opencv-python, PyQt5

效果

左侧是摄像头画面,右侧是回放画面。摄像头画面我写了2个模式,上图展示的是仅显示手部关键点和连线的模式,如果想要看到摄像头完整的画面,也就是你的真实所在背景和手部,RadioButton选择第一个选项即可。

可以看到,右侧可以很准确的回放左侧录制的动作,因为摄像头画面本身是镜像的,我录制用的是左手,所以回放也使用了镜像,如果不想用镜像,修改回放组件中paintEvent函数下的x = (1 - lm['x']) * wx = lm['x'] * w即可。

更多

这里只是简单的展示mediapipe库的用法,这个库除了手势识别还有更多的模块,实际上,就单单是手势识别这一个模块就可以做非常多的有意思的项目。我举几个例子,比如虚拟绘画,可以在摄像头画面上进行写字和绘画,只需要识别手势一些点,利用点的路径作为绘画和写字路径即可,不同的手势还可以设计成不同的功能,比如不同种类的笔和橡皮擦,或者直接写一个工具栏,手势进行选择;比如虚拟鼠标,可以用手势中某个点代替鼠标,对电脑进行操作,比如拇指食指捏一下就是左击等,检测特定的捏一下之类的动作只需要判断拇指食指最顶部点的距离即可;比如剪刀石头布游戏,可以和电脑进行实时的剪刀石头布比赛,同样,剪刀,石头和布的判断就根据点之间的关系判断即可,同理还可以设计脱离键盘鼠标,只用手势控制玩其他电脑游戏;还有虚拟键盘等等,可以看到,发挥想象力,单单手势识别模块就可以做非常多的有趣的项目,且原理其实都差不多,上面提到的几个项目其实我基本都写过,都不复杂。