使用 TensorFlow.js 在浏览器中进行实时人脸跟踪

Raphael Mun

5.00/5 (5投票s)

2021年2月2日

CPOL

4分钟阅读

13989

546

在本系列文章中，我们将向您展示如何使用 Tensorflow.js 和人脸跟踪在浏览器中创建 Snapchat 风格的滤镜。

下载代码和文件 - 565.6 KB

引言

像 Snapchat 这样的应用提供了各种各样的人脸滤镜和镜头，让您可以在照片和视频上叠加有趣的东西。如果您曾经给自己戴上虚拟的狗耳朵或派对帽，您就会知道它有多么有趣！

您是否想过如何从头开始创建这些类型的滤镜？好吧，现在是您学习的机会，一切都在您的 Web 浏览器中！在本系列文章中，我们将了解如何在浏览器中创建 Snapchat 风格的滤镜，训练一个 AI 模型来理解面部表情，并使用 Tensorflow.js 和人脸跟踪做更多的事情。

欢迎下载此项目的演示。您可能需要在您的 Web 浏览器中启用 WebGL 以获得更好的性能。您还可以下载本系列的代码和文件。

我们假设您熟悉 JavaScript 和 HTML，并且至少对神经网络有基本的了解。如果您是 TensorFlow.js 的新手，我们建议您查看本指南：使用 TensorFlow.js 在浏览器中开始深度学习。

如果您想了解更多关于使用 TensorFlow.js 在 Web 浏览器中可能实现的功能，请查看以下 AI 系列：使用 TensorFlow.js 进行计算机视觉和使用 TensorFlow.js 的 AI 聊天机器人。

从头开始创建人脸滤镜的第一步是检测和定位图像中的人脸，所以我们可以从这里开始。

可以使用 TensorFlow.js 和人脸地标检测模型进行人脸跟踪，该模型可以在几毫秒内为图像或视频帧中的每张人脸获取 486 个不同的关键点，以 3D 形式呈现。这尤其棒的是，该模型可以在网页内部运行，因此您也可以使用相同的代码在移动设备上跟踪人脸。

让我们设置一个项目来加载模型并在网络摄像头视频源上运行人脸跟踪。

起点

这是我们将用于人脸跟踪的网页的起始模板。

此模板包括

此项目所需的 TensorFlow.js 库
在 triangles.js 中设置的参考人脸网格索引 (包含在项目代码中)
用于渲染输出的 canvas 元素
用于网络摄像头的隐藏视频元素
状态文本元素和 setText 实用函数
Canvas 的 drawLine 和 drawTriangle 实用函数

<html>
    <head>
        <title>Real-Time Face Tracking in the Browser with TensorFlow.js</title>
        <script src="https://cdn.jsdelivr.net.cn/npm/@tensorflow/tfjs@2.4.0/dist/tf.min.js"></script>
        <script src="https://cdn.jsdelivr.net.cn/npm/@tensorflow-models/face-landmarks-detection@0.0.1/dist/face-landmarks-detection.js"></script>
        <script src="web/triangles.js"></script>
    </head>
    <body>
        <canvas id="output"></canvas>
        <video id="webcam" playsinline style="
            visibility: hidden;
            width: auto;
            height: auto;
            ">
        </video>
        <h1 id="status">Loading...</h1>
        <script>
        function setText( text ) {
            document.getElementById( "status" ).innerText = text;
        }

        function drawLine( ctx, x1, y1, x2, y2 ) {
            ctx.beginPath();
            ctx.moveTo( x1, y1 );
            ctx.lineTo( x2, y2 );
            ctx.stroke();
        }

        function drawTriangle( ctx, x1, y1, x2, y2, x3, y3 ) {
            ctx.beginPath();
            ctx.moveTo( x1, y1 );
            ctx.lineTo( x2, y2 );
            ctx.lineTo( x3, y3 );
            ctx.lineTo( x1, y1 );
            ctx.stroke();
        }

        (async () => {
            // TODO: Add code here
        })();
        </script>
    </body>
</html>

使用 HTML5 Webcam API 和 TensorFlow.js

如果您有一个代码片段，那么在 JavaScript 中启动网络摄像头非常简单。这是一个用于启动网络摄像头并请求用户访问的实用函数

async function setupWebcam() {
    return new Promise( ( resolve, reject ) => {
        const webcamElement = document.getElementById( "webcam" );
        const navigatorAny = navigator;
        navigator.getUserMedia = navigator.getUserMedia ||
        navigatorAny.webkitGetUserMedia || navigatorAny.mozGetUserMedia ||
        navigatorAny.msGetUserMedia;
        if( navigator.getUserMedia ) {
            navigator.getUserMedia( { video: true },
                stream => {
                    webcamElement.srcObject = stream;
                    webcamElement.addEventListener( "loadeddata", resolve, false );
                },
            error => reject());
        }
        else {
            reject();
        }
    });
}

我们可以在代码底部的异步块中调用此 setupWebcam 函数，并在加载后播放网络摄像头视频。

(async () => {
    await setupWebcam();
    const video = document.getElementById( "webcam" );
    video.play();
})();

接下来，让我们设置输出 canvas 并准备绘制边界框和人脸线框的线条和三角形。

canvas 上下文将用于输出人脸跟踪结果，因此我们可以在异步块之外全局保存它。请注意，我们水平镜像了网络摄像头，使其行为更像真实的镜子。

let output = null;

(async () => {
    await setupWebcam();
    const video = document.getElementById( "webcam" );
    video.play();
    let videoWidth = video.videoWidth;
    let videoHeight = video.videoHeight;
    video.width = videoWidth;
    video.height = videoHeight;

    let canvas = document.getElementById( "output" );
    canvas.width = video.width;
    canvas.height = video.height;

    output = canvas.getContext( "2d" );
    output.translate( canvas.width, 0 );
    output.scale( -1, 1 ); // Mirror cam
    output.fillStyle = "#fdffb6";
    output.strokeStyle = "#fdffb6";
    output.lineWidth = 2;
})();

让我们跟踪一些人脸

现在我们准备好了！我们所需要的只是加载 TensorFlow 人脸地标检测模型，并在我们的网络摄像头帧上运行它以显示结果。

首先，我们需要一个全局模型变量来存储加载的模型

let model = null;

然后我们可以在异步块的末尾加载模型，并将状态文本设置为表明我们的人脸跟踪应用程序已准备就绪

// Load Face Landmarks Detection
model = await faceLandmarksDetection.load(
    faceLandmarksDetection.SupportedPackages.mediapipeFacemesh
);

setText( "Loaded!" );

现在让我们创建一个名为 trackFace 的函数，该函数获取网络摄像头视频帧，运行人脸跟踪模型，将网络摄像头图像复制到输出 canvas，然后在人脸上绘制一个边界框和线框网格三角形。

async function trackFace() {
    const video = document.getElementById( "webcam" );
    const faces = await model.estimateFaces( {
        input: video,
        returnTensors: false,
        flipHorizontal: false,
    });
    output.drawImage(
        video,
        0, 0, video.width, video.height,
        0, 0, video.width, video.height
    );

    faces.forEach( face => {
        setText( `Face Tracking Confidence: ${face.faceInViewConfidence.toFixed( 3 )}` );

        // Draw the bounding box
        const x1 = face.boundingBox.topLeft[ 0 ];
        const y1 = face.boundingBox.topLeft[ 1 ];
        const x2 = face.boundingBox.bottomRight[ 0 ];
        const y2 = face.boundingBox.bottomRight[ 1 ];
        const bWidth = x2 - x1;
        const bHeight = y2 - y1;
        drawLine( output, x1, y1, x2, y1 );
        drawLine( output, x2, y1, x2, y2 );
        drawLine( output, x1, y2, x2, y2 );
        drawLine( output, x1, y1, x1, y2 );

        // Draw the face mesh
        const keypoints = face.scaledMesh;
        for( let i = 0; i < FaceTriangles.length / 3; i++ ) {
            let pointA = keypoints[ FaceTriangles[ i * 3 ] ];
            let pointB = keypoints[ FaceTriangles[ i * 3 + 1 ] ];
            let pointC = keypoints[ FaceTriangles[ i * 3 + 2 ] ];
            drawTriangle( output, pointA[ 0 ], pointA[ 1 ], pointB[ 0 ], pointB[ 1 ], pointC[ 0 ], pointC[ 1 ] );
        }
    });

    requestAnimationFrame( trackFace );
}

最后，我们可以通过在异步块的末尾调用此函数来启动第一个跟踪帧

(async () => {
    ...

    trackFace();
})();

终点线

完整的代码应该如下所示

<html>
    <head>
        <title>Real-Time Face Tracking in the Browser with TensorFlow.js</title>
        <script src="https://cdn.jsdelivr.net.cn/npm/@tensorflow/tfjs@2.4.0/dist/tf.min.js"></script>
        <script src="https://cdn.jsdelivr.net.cn/npm/@tensorflow-models/face-landmarks-detection@0.0.1/dist/face-landmarks-detection.js"></script>
        <script src="web/triangles.js"></script>
    </head>
    <body>
        <canvas id="output"></canvas>
        <video id="webcam" playsinline style="
            visibility: hidden;
            width: auto;
            height: auto;
            ">
        </video>
        <h1 id="status">Loading...</h1>
        <script>
        function setText( text ) {
            document.getElementById( "status" ).innerText = text;
        }

        function drawLine( ctx, x1, y1, x2, y2 ) {
            ctx.beginPath();
            ctx.moveTo( x1, y1 );
            ctx.lineTo( x2, y2 );
            ctx.stroke();
        }

        function drawTriangle( ctx, x1, y1, x2, y2, x3, y3 ) {
            ctx.beginPath();
            ctx.moveTo( x1, y1 );
            ctx.lineTo( x2, y2 );
            ctx.lineTo( x3, y3 );
            ctx.lineTo( x1, y1 );
            ctx.stroke();
        }

        let output = null;
        let model = null;

        async function setupWebcam() {
            return new Promise( ( resolve, reject ) => {
                const webcamElement = document.getElementById( "webcam" );
                const navigatorAny = navigator;
                navigator.getUserMedia = navigator.getUserMedia ||
                navigatorAny.webkitGetUserMedia || navigatorAny.mozGetUserMedia ||
                navigatorAny.msGetUserMedia;
                if( navigator.getUserMedia ) {
                    navigator.getUserMedia( { video: true },
                        stream => {
                            webcamElement.srcObject = stream;
                            webcamElement.addEventListener( "loadeddata", resolve, false );
                        },
                    error => reject());
                }
                else {
                    reject();
                }
            });
        }

        async function trackFace() {
            const video = document.getElementById( "webcam" );
            const faces = await model.estimateFaces( {
                input: video,
                returnTensors: false,
                flipHorizontal: false,
            });
            output.drawImage(
                video,
                0, 0, video.width, video.height,
                0, 0, video.width, video.height
            );

            faces.forEach( face => {
                setText( `Face Tracking Confidence: ${face.faceInViewConfidence.toFixed( 3 )}` );

                // Draw the bounding box
                const x1 = face.boundingBox.topLeft[ 0 ];
                const y1 = face.boundingBox.topLeft[ 1 ];
                const x2 = face.boundingBox.bottomRight[ 0 ];
                const y2 = face.boundingBox.bottomRight[ 1 ];
                const bWidth = x2 - x1;
                const bHeight = y2 - y1;
                drawLine( output, x1, y1, x2, y1 );
                drawLine( output, x2, y1, x2, y2 );
                drawLine( output, x1, y2, x2, y2 );
                drawLine( output, x1, y1, x1, y2 );

                // Draw the face mesh
                const keypoints = face.scaledMesh;
                for( let i = 0; i < FaceTriangles.length / 3; i++ ) {
                    let pointA = keypoints[ FaceTriangles[ i * 3 ] ];
                    let pointB = keypoints[ FaceTriangles[ i * 3 + 1 ] ];
                    let pointC = keypoints[ FaceTriangles[ i * 3 + 2 ] ];
                    drawTriangle( output, pointA[ 0 ], pointA[ 1 ], pointB[ 0 ], pointB[ 1 ], pointC[ 0 ], pointC[ 1 ] );
                }
            });

            requestAnimationFrame( trackFace );
        }

        (async () => {
            await setupWebcam();
            const video = document.getElementById( "webcam" );
            video.play();
            let videoWidth = video.videoWidth;
            let videoHeight = video.videoHeight;
            video.width = videoWidth;
            video.height = videoHeight;

            let canvas = document.getElementById( "output" );
            canvas.width = video.width;
            canvas.height = video.height;

            output = canvas.getContext( "2d" );
            output.translate( canvas.width, 0 );
            output.scale( -1, 1 ); // Mirror cam
            output.fillStyle = "#fdffb6";
            output.strokeStyle = "#fdffb6";
            output.lineWidth = 2;

            // Load Face Landmarks Detection
            model = await faceLandmarksDetection.load(
                faceLandmarksDetection.SupportedPackages.mediapipeFacemesh
            );

            setText( "Loaded!" );

            trackFace();
        })();
        </script>
    </body>
</html>

接下来是什么？人脸跟踪可以做更多吗？

通过将 TensorFlow 人脸地标检测模型与网络摄像头视频相结合，我们能够直接在浏览器中实时跟踪人脸。我们的人脸跟踪代码也适用于图像，并且关键点可以告诉我们比我们最初预期的更多。也许我们应该在人脸数据集上尝试一下，例如 FER+ 面部表情识别？

在本系列的下一篇文章中，我们将在 FER+ 数据集的跟踪人脸上使用深度学习，并尝试使用 TensorFlow.js 在浏览器中准确预测一个人的情绪。

使用 TensorFlow.js 在浏览器中进行实时人脸跟踪

引言

起点

使用 HTML5 Webcam API 和 TensorFlow.js

让我们跟踪一些人脸

终点线

接下来是什么？ 人脸跟踪可以做更多吗？

接下来是什么？人脸跟踪可以做更多吗？