佛山做网站有哪几家,阿里云建设网站教学,网站百度收录查询,平面设计和网站运营引言
在现代计算机视觉中#xff0c;面部检测和姿势识别是一个重要的领域#xff0c;它在各种应用中发挥着关键作用#xff0c;包括人脸解锁、表情识别、虚拟现实等。本文将深入探讨一个使用Python编写的应用程序#xff0c;该应用程序结合了多个库和技术#xff0c;用于…引言
在现代计算机视觉中面部检测和姿势识别是一个重要的领域它在各种应用中发挥着关键作用包括人脸解锁、表情识别、虚拟现实等。本文将深入探讨一个使用Python编写的应用程序该应用程序结合了多个库和技术用于面部检测和姿势识别。 文章目录 引言面部检测dlib库OpenCV库 Retinaface-FaceNet实现人脸识别眨眼检测嘴部动作检测头部姿势检测完整代码 结尾与未来展望下一步计划 面部检测
面部检测是任何面部识别任务的基础。在本应用程序中我们使用了两个主要库来进行面部检测dlib、OpenCV。
dlib库
dlib库是一个功能强大的面部检测和特征标定工具。它提供了用于检测人脸及面部特征的算法。在本应用程序中dlib用于检测人脸的位置和特征点。 dlib库的跨平台安装 全面横扫dlib Python API在Linux和Windows的配置方案 【香橙派-OpenCV-Torch-dlib】TF损坏变成RAW格式解决方案及python环境配置
import dlibOpenCV库
OpenCV是一个广泛用于图像处理和计算机视觉任务的库。在本应用程序中OpenCV用于图像处理、显示和保存。
import cv2
import numpy as npRetinaface-FaceNet实现人脸识别
代码基于人工智能领域大佬Bubbliiiing聪明的人脸识别4——Pytorch 利用RetinafaceFacenet搭建人脸识别平台微调
RetinafaceFaceNet人脸识别系统-Gradio界面设计 github: Face-recognition-web-ui
retinaface_new.py
import timeimport cv2
import numpy as np
import torch
import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdmfrom nets.facenet import Facenet
from nets_retinaface.retinaface import RetinaFace
from utils.anchors import Anchors
from utils.config import cfg_mnet, cfg_re50
from utils.utils import (Alignment_1, compare_faces, letterbox_image,preprocess_input)
from utils.utils_bbox import (decode, decode_landm, non_max_suppression,retinaface_correct_boxes)# --------------------------------------#
# 写中文需要转成PIL来写。
# --------------------------------------#def cv2ImgAddText(img, label, left, top, textColor(255, 255, 255)):img Image.fromarray(np.uint8(img))# ---------------## 设置字体# ---------------#font ImageFont.truetype(fontmodel_data/simhei.ttf, size20)draw ImageDraw.Draw(img)label label.encode(utf-8)draw.text((left, top), str(label, UTF-8), filltextColor, fontfont)return np.asarray(img)# --------------------------------------#
# 一定注意backbone和model_path的对应。
# 在更换facenet_model后
# 一定要注意重新编码人脸。
# --------------------------------------#
class Retinaface(object):_defaults {# ----------------------------------------------------------------------## retinaface训练完的权值路径# ----------------------------------------------------------------------#retinaface_model_path: model_data/Retinaface_mobilenet0.25.pth,# ----------------------------------------------------------------------## retinaface所使用的主干网络有mobilenet和resnet50# ----------------------------------------------------------------------#retinaface_backbone: mobilenet,# ----------------------------------------------------------------------## retinaface中只有得分大于置信度的预测框会被保留下来# ----------------------------------------------------------------------#confidence: 0.5,# ----------------------------------------------------------------------## retinaface中非极大抑制所用到的nms_iou大小# ----------------------------------------------------------------------#nms_iou: 0.3,# ----------------------------------------------------------------------## 是否需要进行图像大小限制。# 输入图像大小会大幅度地影响FPS想加快检测速度可以减少input_shape。# 开启后会将输入图像的大小限制为input_shape。否则使用原图进行预测。# 会导致检测结果偏差主干为resnet50不存在此问题。# 可根据输入图像的大小自行调整input_shape注意为32的倍数如[640, 640, 3]# ----------------------------------------------------------------------#retinaface_input_shape: [640, 640, 3],# ----------------------------------------------------------------------## 是否需要进行图像大小限制。# ----------------------------------------------------------------------#letterbox_image: True,# ----------------------------------------------------------------------## facenet训练完的权值路径# ----------------------------------------------------------------------#facenet_model_path: model_data/facenet_mobilenet.pth,# ----------------------------------------------------------------------## facenet所使用的主干网络 mobilenet和inception_resnetv1# ----------------------------------------------------------------------#facenet_backbone: mobilenet,# ----------------------------------------------------------------------## facenet所使用到的输入图片大小# ----------------------------------------------------------------------#facenet_input_shape: [160, 160, 3],# ----------------------------------------------------------------------## facenet所使用的人脸距离门限# ----------------------------------------------------------------------#facenet_threhold: 0.9,# --------------------------------## 是否使用Cuda# 没有GPU可以设置成False# --------------------------------## cuda: Falsecuda: True}classmethoddef get_defaults(cls, n):if n in cls._defaults:return cls._defaults[n]else:return Unrecognized attribute name n # ---------------------------------------------------## 初始化Retinaface# ---------------------------------------------------#def __init__(self, encoding0, **kwargs):self.__dict__.update(self._defaults)for name, value in kwargs.items():setattr(self, name, value)# ---------------------------------------------------## 不同主干网络的config信息# ---------------------------------------------------#if self.retinaface_backbone mobilenet:self.cfg cfg_mnetelse:self.cfg cfg_re50# ---------------------------------------------------## 先验框的生成# ---------------------------------------------------#self.anchors Anchors(self.cfg, image_size(self.retinaface_input_shape[0], self.retinaface_input_shape[1])).get_anchors()self.generate()try:self.known_face_encodings np.load(model_data/{backbone}_face_encoding.npy.format(backboneself.facenet_backbone))self.known_face_names np.load(model_data/{backbone}_names.npy.format(backboneself.facenet_backbone))except:if not encoding:print(载入已有人脸特征失败请检查model_data下面是否生成了相关的人脸特征文件。)pass# ---------------------------------------------------## 获得所有的分类# ---------------------------------------------------#def generate(self):# -------------------------------## 载入模型与权值# -------------------------------#self.net RetinaFace(cfgself.cfg, phaseeval, pre_trainFalse).eval()self.facenet Facenet(backboneself.facenet_backbone, modepredict).eval()# torch.cuda.empty_cache()print(Loading weights into state dict...)# state_dict torch.load(self.retinaface_model_path, map_locationtorch.device(cpu))state_dict torch.load(self.retinaface_model_path)self.net.load_state_dict(state_dict)# state_dict torch.load(self.facenet_model_path, map_locationtorch.device(cpu))state_dict torch.load(self.facenet_model_path)self.facenet.load_state_dict(state_dict, strictFalse)if self.cuda:self.net nn.DataParallel(self.net)self.net self.net.cuda()self.facenet nn.DataParallel(self.facenet)self.facenet self.facenet.cuda()print(Finished!)def encode_face_dataset(self, image_paths, names):face_encodings []for index, path in enumerate(tqdm(image_paths)):# print(index,path,index,path)# ---------------------------------------------------## 打开人脸图片# ---------------------------------------------------#image np.array(Image.open(path), np.float32)# ---------------------------------------------------## 对输入图像进行一个备份# ---------------------------------------------------#old_image image.copy()# ---------------------------------------------------## 计算输入图片的高和宽# ---------------------------------------------------#im_height, im_width, _ np.shape(image)# ---------------------------------------------------## 计算scale用于将获得的预测框转换成原图的高宽# ---------------------------------------------------#scale [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]scale_for_landmarks [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],np.shape(image)[1], np.shape(image)[0]]if self.letterbox_image:image letterbox_image(image, [self.retinaface_input_shape[1], self.retinaface_input_shape[0]])anchors self.anchorselse:anchors Anchors(self.cfg, image_size(im_height, im_width)).get_anchors()# ---------------------------------------------------## 将处理完的图片传入Retinaface网络当中进行预测# ---------------------------------------------------#with torch.no_grad():# print(names[index], here)# -----------------------------------------------------------## 图片预处理归一化。# -----------------------------------------------------------#image torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(torch.FloatTensor)if self.cuda:image image.cuda()anchors anchors.cuda()loc, conf, landms self.net(image)# -----------------------------------------------------------## 对预测框进行解码# -----------------------------------------------------------#boxes decode(loc.data.squeeze(0), anchors, self.cfg[variance])# -----------------------------------------------------------## 获得预测结果的置信度# -----------------------------------------------------------#conf conf.data.squeeze(0)[:, 1:2]# -----------------------------------------------------------## 对人脸关键点进行解码# -----------------------------------------------------------#landms decode_landm(landms.data.squeeze(0), anchors, self.cfg[variance])# -----------------------------------------------------------## 对人脸检测结果进行堆叠# -----------------------------------------------------------#boxes_conf_landms torch.cat([boxes, conf, landms], -1)boxes_conf_landms non_max_suppression(boxes_conf_landms, self.confidence)if len(boxes_conf_landms) 0:print(names[index], 未检测到人脸)continue# ---------------------------------------------------------## 如果使用了letterbox_image的话要把灰条的部分去除掉。# ---------------------------------------------------------#if self.letterbox_image:boxes_conf_landms retinaface_correct_boxes(boxes_conf_landms, \np.array([self.retinaface_input_shape[0],self.retinaface_input_shape[1]]),np.array([im_height, im_width]))boxes_conf_landms[:, :4] boxes_conf_landms[:, :4] * scaleboxes_conf_landms[:, 5:] boxes_conf_landms[:, 5:] * scale_for_landmarks# ---------------------------------------------------## 选取最大的人脸框。# ---------------------------------------------------#best_face_location Nonebiggest_area 0for result in boxes_conf_landms:left, top, right, bottom result[0:4]w right - lefth bottom - topif w * h biggest_area:biggest_area w * hbest_face_location result# ---------------------------------------------------## 截取图像# ---------------------------------------------------#crop_img old_image[int(best_face_location[1]):int(best_face_location[3]),int(best_face_location[0]):int(best_face_location[2])]landmark np.reshape(best_face_location[5:], (5, 2)) - np.array([int(best_face_location[0]), int(best_face_location[1])])crop_img, _ Alignment_1(crop_img, landmark)crop_img np.array(letterbox_image(np.uint8(crop_img), (self.facenet_input_shape[1], self.facenet_input_shape[0]))) / 255crop_img crop_img.transpose(2, 0, 1)crop_img np.expand_dims(crop_img, 0)# ---------------------------------------------------## 利用图像算取长度为128的特征向量# ---------------------------------------------------#with torch.no_grad():crop_img torch.from_numpy(crop_img).type(torch.FloatTensor)if self.cuda:crop_img crop_img.cuda()face_encoding self.facenet(crop_img)[0].cpu().numpy()face_encodings.append(face_encoding)np.save(model_data/{backbone}_face_encoding.npy.format(backboneself.facenet_backbone), face_encodings)np.save(model_data/{backbone}_names.npy.format(backboneself.facenet_backbone), names)# ---------------------------------------------------## 检测图片# ---------------------------------------------------#def live_detect_image(self, image, flag):# ---------------------------------------------------## 对输入图像进行一个备份后面用于绘图# ---------------------------------------------------#old_image image.copy()# ---------------------------------------------------## 把图像转换成numpy的形式# ---------------------------------------------------#image np.array(image, np.float32)# ---------------------------------------------------## Retinaface检测部分-开始# ---------------------------------------------------## ---------------------------------------------------## 计算输入图片的高和宽# ---------------------------------------------------#im_height, im_width, _ np.shape(image)# ---------------------------------------------------## 计算scale用于将获得的预测框转换成原图的高宽# ---------------------------------------------------#scale [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]scale_for_landmarks [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],np.shape(image)[1], np.shape(image)[0]]# ---------------------------------------------------------## letterbox_image可以给图像增加灰条实现不失真的resize# ---------------------------------------------------------#if self.letterbox_image:image letterbox_image(image, [self.retinaface_input_shape[1], self.retinaface_input_shape[0]])anchors self.anchorselse:anchors Anchors(self.cfg, image_size(im_height, im_width)).get_anchors()# ---------------------------------------------------## 将处理完的图片传入Retinaface网络当中进行预测# ---------------------------------------------------#with torch.no_grad():# -----------------------------------------------------------## 图片预处理归一化。# -----------------------------------------------------------#image torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(torch.FloatTensor)if self.cuda:anchors anchors.cuda()image image.cuda()# ---------------------------------------------------------## 传入网络进行预测# ---------------------------------------------------------#loc, conf, landms self.net(image)# ---------------------------------------------------## Retinaface网络的解码最终我们会获得预测框# 将预测结果进行解码和非极大抑制# ---------------------------------------------------#boxes decode(loc.data.squeeze(0), anchors, self.cfg[variance])conf conf.data.squeeze(0)[:, 1:2]landms decode_landm(landms.data.squeeze(0), anchors, self.cfg[variance])# -----------------------------------------------------------## 对人脸检测结果进行堆叠# -----------------------------------------------------------#boxes_conf_landms torch.cat([boxes, conf, landms], -1)boxes_conf_landms non_max_suppression(boxes_conf_landms, self.confidence)# ---------------------------------------------------## 如果没有预测框则返回原图# ---------------------------------------------------#if len(boxes_conf_landms) 0:return old_image, False# ---------------------------------------------------------## 如果使用了letterbox_image的话要把灰条的部分去除掉。# ---------------------------------------------------------#if self.letterbox_image:boxes_conf_landms retinaface_correct_boxes(boxes_conf_landms, \np.array([self.retinaface_input_shape[0],self.retinaface_input_shape[1]]),np.array([im_height, im_width]))boxes_conf_landms[:, :4] boxes_conf_landms[:, :4] * scaleboxes_conf_landms[:, 5:] boxes_conf_landms[:, 5:] * scale_for_landmarks# ---------------------------------------------------## Retinaface检测部分-结束# ---------------------------------------------------## -----------------------------------------------## Facenet编码部分-开始# -----------------------------------------------#face_encodings []for boxes_conf_landm in boxes_conf_landms:# ----------------------## 图像截取人脸矫正# ----------------------#boxes_conf_landm np.maximum(boxes_conf_landm, 0)crop_img np.array(old_image)[int(boxes_conf_landm[1]):int(boxes_conf_landm[3]),int(boxes_conf_landm[0]):int(boxes_conf_landm[2])]landmark np.reshape(boxes_conf_landm[5:], (5, 2)) - np.array([int(boxes_conf_landm[0]), int(boxes_conf_landm[1])])crop_img, _ Alignment_1(crop_img, landmark)# ----------------------## 人脸编码# ----------------------#crop_img np.array(letterbox_image(np.uint8(crop_img), (self.facenet_input_shape[1], self.facenet_input_shape[0]))) / 255crop_img np.expand_dims(crop_img.transpose(2, 0, 1), 0)with torch.no_grad():crop_img torch.from_numpy(crop_img).type(torch.FloatTensor)if self.cuda:crop_img crop_img.cuda()# -----------------------------------------------## 利用facenet_model计算长度为128特征向量# -----------------------------------------------#face_encoding self.facenet(crop_img)[0].cpu().numpy()face_encodings.append(face_encoding)# -----------------------------------------------## Facenet编码部分-结束# -----------------------------------------------## -----------------------------------------------## 人脸特征比对-开始# -----------------------------------------------#face_names []for face_encoding in face_encodings:# -----------------------------------------------------## 取出一张脸并与数据库中所有的人脸进行对比计算得分# -----------------------------------------------------#matches, face_distances compare_faces(self.known_face_encodings, face_encoding,toleranceself.facenet_threhold)name Unknown# -----------------------------------------------------## 取出这个最近人脸的评分# 取出当前输入进来的人脸最接近的已知人脸的序号# -----------------------------------------------------#best_match_index np.argmin(face_distances)if matches[best_match_index]:name self.known_face_names[best_match_index]if flag 0:name Falseface_names.append(name)# -----------------------------------------------## 人脸特征比对-结束# -----------------------------------------------#for i, b in enumerate(boxes_conf_landms):text {:.4f}.format(b[4])b list(map(int, b))# ---------------------------------------------------## b[0]-b[3]为人脸框的坐标b[4]为得分# ---------------------------------------------------#cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)cx b[0]cy b[1] 12cv2.putText(old_image, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))# ---------------------------------------------------## b[5]-b[14]为人脸关键点的坐标# ---------------------------------------------------#cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)name face_names[i]# font cv2.FONT_HERSHEY_SIMPLEX# cv2.putText(old_image, name, (b[0] , b[3] - 15), font, 0.75, (255, 255, 255), 2)# --------------------------------------------------------------## cv2不能写中文加上这段可以但是检测速度会有一定的下降。# 如果不是必须可以换成cv2只显示英文。# --------------------------------------------------------------#old_image cv2ImgAddText(old_image, name, b[0] 5, b[3] - 25)# print(ff:, face_names[0])if face_names:return old_image, face_names[0]else:return old_image, False眨眼检测
眨眼检测是本应用程序的一个重要功能。我们使用了眨眼检测算法来监测眨眼动作。在BlinkDetection类中眨眼的EAR眼睛纵横比阈值被设置为0.2。当EAR低于这个阈值时认为用户眨了眼睛。
class BlinkDetection:def __init__(self):self.ear Noneself.status Noneself.frame_counter 0self.blink_counter 0self.EAR_THRESHOLD 0.2 # 眨眼的 EAR 阈值def eye_aspect_ratio(self, eye):A np.linalg.norm(eye[1] - eye[5])B np.linalg.norm(eye[2] - eye[4])C np.linalg.norm(eye[0] - eye[3])ear (A B) / (2.0 * C)return eardef detect(self, landmarks):left_eye landmarks[36:42]right_eye landmarks[42:48]EAR_left self.eye_aspect_ratio(left_eye)EAR_right self.eye_aspect_ratio(right_eye)self.ear (EAR_left EAR_right) / 2.0if self.ear 0.21:self.frame_counter 1self.status Blinkingelse:if self.frame_counter 2: # 改为2次算检测结束self.blink_counter 1self.frame_counter 0self.status Openreturn self.blink_counter, self.status, self.ear嘴部动作检测
嘴部动作检测用于监测用户是否张嘴。在MouthDetection类中我们计算了嘴巴的MAR嘴巴纵横比并将其与阈值0.5进行比较。当MAR大于0.5时表示用户张嘴。
class MouthDetection:def __init__(self):self.mStart, self.mEnd (48, 68)self.mouth_counter 0self.MAR_THRESHOLD 0.5self.mouth_open False # 嘴巴状态初始为闭上def mouth_aspect_ratio(self, mouth):A np.linalg.norm(mouth[2] - mouth[9])B np.linalg.norm(mouth[4] - mouth[7])C np.linalg.norm(mouth[0] - mouth[6])mar (A B) / (2.0 * C)return mardef detect(self, landmarks):mouth landmarks[self.mStart:self.mEnd]mar self.mouth_aspect_ratio(mouth)if mar self.MAR_THRESHOLD:if not self.mouth_open: # 从闭上到张开self.mouth_counter 1self.mouth_open Trueelse:if self.mouth_open: # 从张开到闭上self.mouth_open Falsereturn self.mouth_counter头部姿势检测
头部姿势检测用于监测用户头部的旋转角度。在HeadPoseDetection类中我们计算了头部的旋转角度并根据阈值判断头部的方向左、右、中。
class HeadPoseDetection:def __init__(self):self.left_counter 0self.right_counter 0self.nod_threshold 10self.low_threshold -10self.head_status neutraldef calculate_head_pose(self, shape):x, y zip(*shape)face_center (int(np.mean(x)), int(np.mean(y)))left_eye_center np.mean(shape[36:42], axis0)right_eye_center np.mean(shape[42:48], axis0)dX right_eye_center[0] - left_eye_center[0]dY right_eye_center[1] - left_eye_center[1]angle np.degrees(np.arctan2(dY, dX))return angledef detect(self, shape):angle self.calculate_head_pose(shape)if angle self.nod_threshold:self.head_status leftself.left_counter 1return self.head_status, self.left_counterelif angle self.low_threshold:self.head_status rightself.right_counter 1return self.head_status, self.right_counterelse:self.head_status neutralreturn self.head_status, 0完整代码
在FaceDetection类中我们将上述功能整合在一起并使用摄像头或视频文件来进行面部检测和姿势识别。用户可以使用不同的动作来触发应用程序进入 “flag” 状态例如眨眼、张嘴、或头部旋转。一旦触发应用程序将采用Retinaface来检测面部特征并在窗口中显示视频帧。 在这段代码中首先我们通过随机选择一个顺序包括眨眼、张嘴和头部姿势检测。每个动作检测都有其独立的计数器例如眨眼计数器、张嘴计数器和头部计数器。只有在满足特定条件时相关动作的计数器才会递增。一旦三个动作的计数器均达到阈值应用程序的标志被设置为1表示活体检测成功。接下来我们使用Retinaface库检测面部特征计算FPS并在图像中显示检测结果。最后当应用程序标志被设置为1时我们可以执行人脸识别或其他相关操作以确保在进行人脸识别之前已完成活体检测。这种随机动作顺序实现了更加严格的活体检测提高了安全性和准确性。 NAME : try_7
USER : admin
DATE : 9/10/2023
PROJECT_NAME : new_live_face
CSDN : friklogffimport random
import time
import cv2
import numpy as np
from retinaface_new import Retinaface
import dlib
from imutils import face_utilsclass BlinkDetection:def __init__(self):self.ear Noneself.status Noneself.frame_counter 0self.blink_counter 0self.EAR_THRESHOLD 0.2 # 眨眼的 EAR 阈值def eye_aspect_ratio(self, eye):A np.linalg.norm(eye[1] - eye[5])B np.linalg.norm(eye[2] - eye[4])C np.linalg.norm(eye[0] - eye[3])ear (A B) / (2.0 * C)return eardef detect(self, landmarks):left_eye landmarks[36:42]right_eye landmarks[42:48]EAR_left self.eye_aspect_ratio(left_eye)EAR_right self.eye_aspect_ratio(right_eye)self.ear (EAR_left EAR_right) / 2.0if self.ear 0.21:self.frame_counter 1self.status Blinkingelse:if self.frame_counter 2: # 改为2次算检测结束self.blink_counter 1self.frame_counter 0self.status Openreturn self.blink_counter, self.status, self.earclass MouthDetection:def __init__(self):self.mStart, self.mEnd (48, 68)self.mouth_counter 0self.MAR_THRESHOLD 0.5self.mouth_open False # 嘴巴状态初始为闭上def mouth_aspect_ratio(self, mouth):A np.linalg.norm(mouth[2] - mouth[9])B np.linalg.norm(mouth[4] - mouth[7])C np.linalg.norm(mouth[0] - mouth[6])mar (A B) / (2.0 * C)return mardef detect(self, landmarks):mouth landmarks[self.mStart:self.mEnd]mar self.mouth_aspect_ratio(mouth)if mar self.MAR_THRESHOLD:if not self.mouth_open: # 从闭上到张开self.mouth_counter 1self.mouth_open Trueelse:if self.mouth_open: # 从张开到闭上self.mouth_open Falsereturn self.mouth_counterclass HeadPoseDetection:def __init__(self):self.left_counter 0self.right_counter 0self.nod_threshold 10self.low_threshold -10self.head_status neutraldef calculate_head_pose(self, shape):x, y zip(*shape)face_center (int(np.mean(x)), int(np.mean(y)))left_eye_center np.mean(shape[36:42], axis0)right_eye_center np.mean(shape[42:48], axis0)dX right_eye_center[0] - left_eye_center[0]dY right_eye_center[1] - left_eye_center[1]angle np.degrees(np.arctan2(dY, dX))return angledef detect(self, shape):angle self.calculate_head_pose(shape)if angle self.nod_threshold:self.head_status leftself.left_counter 1return self.head_status, self.left_counterelif angle self.low_threshold:self.head_status rightself.right_counter 1return self.head_status, self.right_counterelse:self.head_status neutralreturn self.head_status, 0class FaceDetection:def __init__(self, video_path, video_save_path, video_fps25.0, use_cameraFalse):self.name Noneself.mouth_flag Falseself.head_flag Falseself.blink_flag Falseself.random_flag random.randint(1, 3)if use_camera:self.capture cv2.VideoCapture(0)else:self.capture cv2.VideoCapture(video_path)self.video_save_path video_save_pathif video_save_path ! :fourcc cv2.VideoWriter_fourcc(*XVID)size (int(self.capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))self.out cv2.VideoWriter(video_save_path, fourcc, video_fps, size)self.ref, frame self.capture.read()if not self.ref:raise ValueError(未能正确读取摄像头视频请注意是否正确安装摄像头是否正确填写视频路径。)self.fps 0.0self.flag 0self.detector dlib.get_frontal_face_detector()self.predictor dlib.shape_predictor(shape_predictor_68_face_landmarks.dat)self.blink_detector BlinkDetection()self.mouth_detector MouthDetection()self.head_pose_detector HeadPoseDetection()self.nod_threshold 10self.low_threshold -10self.head_status neutralself.blink_counter 0self.mouth_counter 0self.head_counter 0self.ear Noneself.status Noneself.retinaface Retinaface()def detect_blink(self, frame, landmarks):self.blink_counter, self.status, self.ear self.blink_detector.detect(landmarks)cv2.putText(frame, Blinks: {}.format(self.blink_counter), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(0, 0, 255), 2)cv2.putText(frame, EAR: {:.2f}.format(self.ear), (300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)cv2.putText(frame, Eyes Status: {}.format(self.status), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0),2)return self.blink_counterdef detect_mouth(self, frame, landmarks):self.mouth_counter self.mouth_detector.detect(landmarks)cv2.putText(frame, Mouth Count: {}.format(self.mouth_counter), (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(0, 0, 255), 2)return self.mouth_counterdef detect_head_pose(self, frame, gray, face_rectangle):shape self.predictor(gray, face_rectangle)shape face_utils.shape_to_np(shape)self.head_status, self.head_counter self.head_pose_detector.detect(shape)cv2.putText(frame, Head Status: {}.format(self.head_status), (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(0, 0, 255),2)cv2.putText(frame, Head Count: {}.format(self.head_counter), (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(0, 0, 255),2)return self.head_counterdef process_frame(self):t1 time.time()self.ref, self.frame self.capture.read()if not self.ref:return Nonegray cv2.cvtColor(self.frame, cv2.COLOR_BGR2GRAY)faces self.detector(gray, 0)if self.flag 1:self.frame cv2.cvtColor(self.frame, cv2.COLOR_BGR2RGB)old_image, self.name self.retinaface.live_detect_image(self.frame, self.flag)self.frame np.array(old_image)self.frame cv2.cvtColor(self.frame, cv2.COLOR_RGB2BGR)self.fps (self.fps (1. / (time.time() - t1))) / 2# print(fps %.2f % (self.fps))self.frame cv2.putText(self.frame, fps %.2f % self.fps, (200, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)elif len(faces) ! 0:largest_index self._largest_face(faces)face_rectangle faces[largest_index]landmarks np.matrix([[p.x, p.y] for p in self.predictor(self.frame, face_rectangle).parts()])if self.random_flag 1:# 调用眨眼检测函数self.detect_blink(self.frame, landmarks)if self.blink_counter 3:self.blink_flag Trueself.random_flag random.randint(1, 3)if self.random_flag 2:# 调用嘴巴动作检测函数self.detect_mouth(self.frame, landmarks)if self.mouth_counter 3:self.mouth_flag Trueself.random_flag random.randint(1, 3)if self.random_flag 3:# 调用头部姿势检测函数self.detect_head_pose(self.frame, gray, face_rectangle)if self.head_counter 0:self.head_flag Trueself.random_flag random.randint(1, 3)if self.blink_flag and self.mouth_flag and self.head_flag:self.flag 1if self.video_save_path ! :self.out.write(self.frame)return self.ref, self.framedef _largest_face(self, dets):if len(dets) 1:return 0face_areas [(det.right() - det.left()) * (det.bottom() - det.top()) for det in dets]largest_area face_areas[0]largest_index 0for index in range(1, len(dets)):if face_areas[index] largest_area:largest_index indexlargest_area face_areas[index]print(largest_face index is {} in {} faces.format(largest_index, len(dets)))return largest_indexdef release(self):print(Video Detection Done!)self.capture.release()if self.video_save_path ! :print(Save processed video to the path: self.video_save_path)self.out.release()def get_blink_counter(self):return self.blink_counterdef get_mouth_counter(self):return self.mouth_counterdef get_head_counter(self):return self.head_counterdef get_flag(self):return self.flagdef get_name(self):return self.nameif __name__ __main__:detector FaceDetection(R.mp4) # 使用摄像头也可以指定视频文件路径# detector FaceDetection(0) # 使用摄像头也可以指定视频文件路径while True:flag detector.get_flag()ref, frame detector.process_frame()if frame is None:breakcv2.imshow(Frame, frame)if cv2.waitKey(1) 0xFF ord(q):breakif flag 1:print(flag)cv2.imwrite(last_frame.png, frame)# print(fname)breakdetector.release()cv2.destroyAllWindows()
结尾与未来展望
面部检测和姿势识别是计算机视觉领域的重要研究方向之一它们在各种应用中具有广泛的应用前景。未来我们可以期待更多的创新以提高这些技术的准确性和实用性。
在本文中我们了解了如何使用Python和各种库来实现面部检测和姿势识别。我们看到了眨眼、张嘴和头部旋转等动作如何触发应用程序的不同功能。这只是开始未来的应用将更加智能和多功能。
未来的展望包括 实时应用: 随着硬件性能的不断提高实时面部检测和姿势识别将变得更加实用用于虚拟现实、增强现实和交互式游戏。 情感分析: 面部检测可用于情感分析识别用户的情绪和情感状态从而改进用户体验。 生物识别: 面部识别技术正在被用于生物识别领域例如人脸解锁和身份验证。 医疗应用: 面部检测和姿势识别可以用于医疗应用例如帮助监测病人的眼睛、嘴巴和头部动作以提前识别疾病症状。 人机交互: 进一步改进人机交互包括手势控制和面部表情识别。
总的来说面部检测和姿势识别技术将继续推动计算机视觉的发展为各种应用提供更加智能和互动的功能。这个领域充满了机会对于有兴趣深入研究的开发者和研究人员来说有着无限的潜力。
本文中的示例应用程序仅仅是开始你可以进一步扩展它将这些技术应用到更多有趣的项目中。无论你是一个计算机视觉领域的专家还是一个对新技术充满好奇心的初学者这个领域都将为你提供无穷的探索和创新机会。希望本文能够激发你深入研究面部检测和姿势识别的兴趣并在未来的项目中发挥作用。
下一步计划
本文活体检测算法安全性较差接下来我会尝试学习活体模型训练算法向大家分享我的学习历程。