MediaPipe Examples

This document provides examples of MediaPipe's face detection, hand detection, pose detection, holistic detection, background segmentation, and 3D object detection features.

tip

References:

Face Detection

This example captures video from the camera, detects faces, and draws bounding boxes around them.

To run this example:

Create a new file named face_detection.py
Copy the following code into face_detection.py
Run the example with python3 face_detection.py
Click on the preview window and press 'q' to quit

face_detection.py

#!/usr/bin/env python3
# -_- encoding: utf-8 -_-

import cv2
import mediapipe as mp
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
with mp_face_detection.FaceDetection(
model_selection=0, min_detection_confidence=0.5) as face_detection:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'.
continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(image)

    # Draw the face detection annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.detections:
      for detection in results.detections:
        mp_drawing.draw_detection(image, detection)
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Face Detection', cv2.flip(image, 1))
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

Preview:

Face Mesh

This example captures video from the camera, detects faces, and applies a mesh overlay on detected faces.

To run this example:

Create a new file named face_mesh.py
Copy the following code into face_mesh.py
Run the example with python3 face_mesh.py
Click on the preview window and press 'q' to quit

face_mesh.py

#!/usr/bin/env python3

# -_- encoding: utf-8 -_-

import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh

drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as face_mesh:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'.
continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image)

    # Draw the face mesh annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_face_landmarks:
      for face_landmarks in results.multi_face_landmarks:
        mp_drawing.draw_landmarks(
            image=image,
            landmark_list=face_landmarks,
            connections=mp_face_mesh.FACEMESH_TESSELLATION,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp_drawing_styles
            .get_default_face_mesh_tessellation_style())
        mp_drawing.draw_landmarks(
            image=image,
            landmark_list=face_landmarks,
            connections=mp_face_mesh.FACEMESH_CONTOURS,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp_drawing_styles
            .get_default_face_mesh_contours_style())
        mp_drawing.draw_landmarks(
            image=image,
            landmark_list=face_landmarks,
            connections=mp_face_mesh.FACEMESH_IRISES,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp_drawing_styles
            .get_default_face_mesh_iris_connections_style())
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Face Mesh', cv2.flip(image, 1))
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

Preview:

Hand Detection

Read the camera image and perform hand detection, drawing the skeleton on the detected hand.

Run the example steps:

Create a new file named hand_detection.py
Copy the code into hand_detection.py
Run the example with python3 hand_detection.py
Click on the preview window and press 'q' to quit

hand_detection.py

#!/usr/bin/env python3

# -_- encoding: utf-8 -_-

import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

cap = cv2.VideoCapture(0)
with mp_hands.Hands(
model_complexity=0,
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as hands:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'.
continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

Preview:

骨骼检测

读取摄像头画面并进行人体检测，在检测出来的人体上绘制骨骼。

To run this example:

Create a new file named pose_detection.py
Copy the following code into pose_detection.py
Run the example with python3 pose_detection.py
Click on the preview window and press 'q' to quit

pose_detection.py

#!/usr/bin/env python3

# -_- encoding: utf-8 -_-

import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose

cap = cv2.VideoCapture(0)
with mp_pose.Pose(
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as pose:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'.
continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(image)

    # Draw the pose annotation on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    mp_drawing.draw_landmarks(
        image,
        results.pose_landmarks,
        mp_pose.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Pose', cv2.flip(image, 1))
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

Preview:

Holistic Detection

Read the camera image and perform holistic detection, drawing the skeleton and mask on the detected human body.

Run the example steps:

Create a new file named holistic_detection.py
Copy the code into holistic_detection.py
Run the example with python3 holistic_detection.py
Click on the preview window and press 'q' to quit

holistic_detection.py

#!/usr/bin/env python3

# -_- encoding: utf-8 -_-

import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'.
continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = holistic.process(image)

    # Draw landmark annotation on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    mp_drawing.draw_landmarks(
        image,
        results.face_landmarks,
        mp_holistic.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles
        .get_default_face_mesh_contours_style())
    mp_drawing.draw_landmarks(
        image,
        results.pose_landmarks,
        mp_holistic.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles
        .get_default_pose_landmarks_style())
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Holistic', cv2.flip(image, 1))
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

Preview:

Background Segmentation

Read the camera image and perform background segmentation, replacing the background with a solid color.

Run the example steps:

Create a new file named back_seg.py
Copy the code into back_seg.py
Run the example with python3 back_seg.py
Click on the preview window and press 'q' to quit

back_seg.py

#!/usr/bin/env python3

# -_- encoding: utf-8 -_-

import cv2
import mediapipe as mp
import numpy as np
mp_drawing = mp.solutions.drawing_utils
mp_selfie_segmentation = mp.solutions.selfie_segmentation

BG_COLOR = (192, 192, 192) # gray
cap = cv2.VideoCapture(0)
with mp_selfie_segmentation.SelfieSegmentation(
model_selection=1) as selfie_segmentation:
bg_image = None
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'.
continue

    # Flip the image horizontally for a later selfie-view display, and convert
    # the BGR image to RGB.
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    results = selfie_segmentation.process(image)

    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Draw selfie segmentation on the background image.
    # To improve segmentation around boundaries, consider applying a joint
    # bilateral filter to "results.segmentation_mask" with "image".
    condition = np.stack(
      (results.segmentation_mask,) * 3, axis=-1) > 0.1
    # The background can be customized.
    #   a) Load an image (with the same width and height of the input image) to
    #      be the background, e.g., bg_image = cv2.imread('/path/to/image/file')
    #   b) Blur the input image by applying image filtering, e.g.,
    #      bg_image = cv2.GaussianBlur(image,(55,55),0)
    if bg_image is None:
      bg_image = np.zeros(image.shape, dtype=np.uint8)
      bg_image[:] = BG_COLOR
    output_image = np.where(condition, image, bg_image)

    cv2.imshow('MediaPipe Selfie Segmentation', output_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

Preview:

3D Object Detection

Read the camera feed and perform 3D object detection (supports four models: shoe, chair, cup, and camera), then draw 3D bounding boxes around the detected objects.

To run this example:

Create a new file named 3d_object_detection.py
Copy the following code into 3d_object_detection.py
Run the example with python3 3d_object_detection.py
Click on the preview window and press 'q' to quit

3d_object_detection.py

#!/usr/bin/env python3

# -_- coding: utf-8 -_-

import cv2
import mediapipe as mp

# Initialize MediaPipe components

mp_drawing = mp.solutions.drawing_utils
mp_objectron = mp.solutions.objectron

# Available models

MODELS = ['Shoe', 'Chair', 'Cup', 'Camera']
current_model = 0 # Start with first model

# Initialize video capture

cap = cv2.VideoCapture(0)

# Create initial Objectron instance

objectron = mp_objectron.Objectron(
static_image_mode=False,
max_num_objects=5,
min_detection_confidence=0.5,
min_tracking_confidence=0.99,
model_name=MODELS[current_model]
)

while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
continue

    # Process image
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = objectron.process(image)

    # Draw results
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    if results.detected_objects:
        for detected_object in results.detected_objects:
            mp_drawing.draw_landmarks(
                image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
            mp_drawing.draw_axis(image, detected_object.rotation,
                               detected_object.translation)

    # # First flip the image for display
    # image = cv2.flip(image, 1)

    # Then add text to the already-mirrored image
    # Note: We adjust the x-position to account for mirroring
    text_x = image.shape[1] - 200  # Right side position
    cv2.putText(image, f"Model: {MODELS[current_model]}", (text_x, 30),
               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # Show final view
    cv2.imshow('MediaPipe Objectron', image)

    # Key handling
    key = cv2.waitKey(5) & 0xFF
    if key == 27 or key == ord('q'):  # ESC or 'q' to quit
        break
    elif key == ord('f'):  # 'f' for next model
        current_model = (current_model + 1) % len(MODELS)
        objectron = mp_objectron.Objectron(
            static_image_mode=False,
            max_num_objects=5,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.99,
            model_name=MODELS[current_model]
        )
        print(f"Switched to model: {MODELS[current_model]}")

cap.release()
cv2.destroyAllWindows()

Preview:

Face Detection​

Face Mesh​

Hand Detection​

骨骼检测​

Holistic Detection​

Background Segmentation​

3D Object Detection​

Face Detection

Face Mesh

Hand Detection

骨骼检测

Holistic Detection

Background Segmentation

3D Object Detection