Skip to content

Commit

Permalink
feat: implement FaceDetector (#974)
Browse files Browse the repository at this point in the history
* feat: implement FaceDetectorOptions

* feat: implement FaceDetector

* build: include FaceDetector

* build: include blaze_face_short_range.tflite
  • Loading branch information
homuler committed Jul 29, 2023
1 parent 3eb66c8 commit c777113
Show file tree
Hide file tree
Showing 14 changed files with 395 additions and 0 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Copyright (c) 2023 homuler
//
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.

using System.Collections.Generic;

using FaceDetectionResult = Mediapipe.Tasks.Components.Containers.DetectionResult;

namespace Mediapipe.Tasks.Vision.FaceDetector
{
public sealed class FaceDetector : Core.BaseVisionTaskApi
{
private const string _DETECTIONS_OUT_STREAM_NAME = "detections";
private const string _DETECTIONS_TAG = "DETECTIONS";
private const string _NORM_RECT_STREAM_NAME = "norm_rect_in";
private const string _NORM_RECT_TAG = "NORM_RECT";
private const string _IMAGE_IN_STREAM_NAME = "image_in";
private const string _IMAGE_OUT_STREAM_NAME = "image_out";
private const string _IMAGE_TAG = "IMAGE";
private const string _TASK_GRAPH_NAME = "mediapipe.tasks.vision.face_detector.FaceDetectorGraph";

private const int _MICRO_SECONDS_PER_MILLISECOND = 1000;

#pragma warning disable IDE0052 // Remove unread private members
/// <remarks>
/// keep reference to prevent GC from collecting the callback instance.
/// </remarks>
private readonly Tasks.Core.TaskRunner.PacketsCallback _packetCallback;
#pragma warning restore IDE0052

private FaceDetector(
CalculatorGraphConfig graphConfig,
Core.RunningMode runningMode,
Tasks.Core.TaskRunner.PacketsCallback packetCallback) : base(graphConfig, runningMode, packetCallback)
{
_packetCallback = packetCallback;
}

public static FaceDetector CreateFromModelPath(string modelPath)
{
var baseOptions = new Tasks.Core.BaseOptions(modelAssetPath: modelPath);
var options = new FaceDetectorOptions(baseOptions, runningMode: Core.RunningMode.IMAGE);
return CreateFromOptions(options);
}

public static FaceDetector CreateFromOptions(FaceDetectorOptions options)
{
var taskInfo = new Tasks.Core.TaskInfo<FaceDetectorOptions>(
taskGraph: _TASK_GRAPH_NAME,
inputStreams: new List<string> {
string.Join(":", _IMAGE_TAG, _IMAGE_IN_STREAM_NAME),
string.Join(":", _NORM_RECT_TAG, _NORM_RECT_STREAM_NAME),
},
outputStreams: new List<string> {
string.Join(":", _DETECTIONS_TAG, _DETECTIONS_OUT_STREAM_NAME),
string.Join(":", _IMAGE_TAG, _IMAGE_OUT_STREAM_NAME),
},
taskOptions: options);

return new FaceDetector(
taskInfo.GenerateGraphConfig(options.runningMode == Core.RunningMode.LIVE_STREAM),
options.runningMode,
BuildPacketsCallback(options.resultCallback));
}

/// <summary>
/// Performs face detection on the provided MediaPipe Image.
///
/// Only use this method when the <see cref="FaceDetector" /> is created with the image running mode.
/// </summary>
/// <returns>
/// A face detection result object that contains a list of face detections,
/// each detection has a bounding box that is expressed in the unrotated input
/// frame of reference coordinates system, i.e. in `[0,image_width) x [0,
/// image_height)`, which are the dimensions of the underlying image data.
/// </returns>
public FaceDetectionResult Detect(Image image, Core.ImageProcessingOptions? imageProcessingOptions = null)
{
var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false);

var packetMap = new PacketMap();
packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image));
packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect));
var outputPackets = ProcessImageData(packetMap);

var outDetectionsPacket = outputPackets.At<DetectionVectorPacket, List<Detection>>(_DETECTIONS_OUT_STREAM_NAME);
if (outDetectionsPacket.IsEmpty())
{
return new FaceDetectionResult(new List<Components.Containers.Detection>());
}
return FaceDetectionResult.CreateFrom(outDetectionsPacket.Get());
}

/// <summary>
/// Performs face detection on the provided video frames.
///
/// Only use this method when the FaceDetector is created with the video
/// running mode. It's required to provide the video frame's timestamp (in
/// milliseconds) along with the video frame. The input timestamps should be
/// monotonically increasing for adjacent calls of this method.
/// </summary>
/// <returns>
/// A face detection result object that contains a list of face detections,
/// each detection has a bounding box that is expressed in the unrotated input
/// frame of reference coordinates system, i.e. in `[0,image_width) x [0,
/// image_height)`, which are the dimensions of the underlying image data.
/// </returns>
public FaceDetectionResult DetectForVideo(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null)
{
var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false);

var packetMap = new PacketMap();
var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND);
packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp));
packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp));
var outputPackets = ProcessVideoData(packetMap);

var outDetectionsPacket = outputPackets.At<DetectionVectorPacket, List<Detection>>(_DETECTIONS_OUT_STREAM_NAME);
if (outDetectionsPacket.IsEmpty())
{
return new FaceDetectionResult(new List<Components.Containers.Detection>());
}
return FaceDetectionResult.CreateFrom(outDetectionsPacket.Get());
}

/// <summary>
/// Sends live image data (an Image with a unique timestamp) to perform face detection.
///
/// Only use this method when the FaceDetector is created with the live stream
/// running mode. The input timestamps should be monotonically increasing for
/// adjacent calls of this method. This method will return immediately after the
/// input image is accepted. The results will be available via the
/// <see cref="FaceDetectorOptions.ResultCallback" /> provided in the <see cref="FaceDetectorOptions" />.
/// The <see cref="DetectAsync" /> method is designed to process live stream data such as camera
/// input. To lower the overall latency, face detector may drop the input
/// images if needed. In other words, it's not guaranteed to have output per
/// input image.
public void DetectAsync(Image image, int timestampMs, Core.ImageProcessingOptions? imageProcessingOptions = null)
{
var normalizedRect = ConvertToNormalizedRect(imageProcessingOptions, image, roiAllowed: false);

var packetMap = new PacketMap();
var timestamp = new Timestamp(timestampMs * _MICRO_SECONDS_PER_MILLISECOND);
packetMap.Emplace(_IMAGE_IN_STREAM_NAME, new ImagePacket(image, timestamp));
packetMap.Emplace(_NORM_RECT_STREAM_NAME, new NormalizedRectPacket(normalizedRect).At(timestamp));

SendLiveStreamData(packetMap);
}

private static Tasks.Core.TaskRunner.PacketsCallback BuildPacketsCallback(FaceDetectorOptions.ResultCallback resultCallback)
{
if (resultCallback == null)
{
return null;
}

return (PacketMap outputPackets) =>
{
var outImagePacket = outputPackets.At<ImagePacket, Image>(_IMAGE_OUT_STREAM_NAME);
var outDetectionsPacket = outputPackets.At<DetectionVectorPacket, List<Detection>>(_DETECTIONS_OUT_STREAM_NAME);
if (outImagePacket == null || outDetectionsPacket == null)
{
return;
}
if (outImagePacket.IsEmpty())
{
return;
}
var image = outImagePacket.Get();
var timestamp = outImagePacket.Timestamp().Microseconds() / _MICRO_SECONDS_PER_MILLISECOND;
if (outDetectionsPacket.IsEmpty())
{
resultCallback(
new FaceDetectionResult(new List<Components.Containers.Detection>()),
image,
(int)timestamp);
return;
}
var detectionProtoList = outDetectionsPacket.Get();
resultCallback(FaceDetectionResult.CreateFrom(detectionProtoList), image, (int)timestamp);
};
}
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright (c) 2023 homuler
//
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT.

namespace Mediapipe.Tasks.Vision.FaceDetector
{
public sealed class FaceDetectorOptions : Tasks.Core.ITaskOptions
{
/// <param name="detectionResult">
/// face detection result object that contains a list of face detections,
/// each detection has a bounding box that is expressed in the unrotated
/// input frame of reference coordinates system,
/// i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions
/// of the underlying image data.
/// </param>
/// <param name="image">
/// The input image that the face detector runs on.
/// </param>
/// <param name="timestampMs">
/// The input timestamp in milliseconds.
/// </param>
public delegate void ResultCallback(Components.Containers.DetectionResult detectionResult, Image image, int timestampMs);

public Tasks.Core.BaseOptions baseOptions { get; }
public Core.RunningMode runningMode { get; }
public float minDetectionConfidence { get; } = 0.5f;
public float minSuppressionThreshold { get; } = 0.3f;
public ResultCallback resultCallback { get; }

public FaceDetectorOptions(
Tasks.Core.BaseOptions baseOptions,
Core.RunningMode runningMode = Core.RunningMode.IMAGE,
float minDetectionConfidence = 0.5f,
float minSuppressionThreshold = 0.3f,
ResultCallback resultCallback = null)
{
this.baseOptions = baseOptions;
this.runningMode = runningMode;
this.minDetectionConfidence = minDetectionConfidence;
this.minSuppressionThreshold = minSuppressionThreshold;
this.resultCallback = resultCallback;
}

internal Proto.FaceDetectorGraphOptions ToProto()
{
var baseOptionsProto = baseOptions.ToProto();
baseOptionsProto.UseStreamMode = runningMode != Core.RunningMode.IMAGE;

return new Proto.FaceDetectorGraphOptions
{
BaseOptions = baseOptionsProto,
MinDetectionConfidence = minDetectionConfidence,
MinSuppressionThreshold = minSuppressionThreshold,
};
}

CalculatorOptions Tasks.Core.ITaskOptions.ToCalculatorOptions()
{
var options = new CalculatorOptions();
options.SetExtension(Proto.FaceDetectorGraphOptions.Extensions.Ext, ToProto());
return options;
}
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions mediapipe_api/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ load("@bazel_skylib//lib:selects.bzl", "selects")
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag", "string_list_flag")
load("@rules_pkg//:pkg.bzl", "pkg_zip")
load("@build_bazel_apple_support//rules:universal_binary.bzl", "universal_binary")
load("@com_google_mediapipe//mediapipe/framework/tool:mediapipe_files.bzl", "mediapipe_files")

bool_flag(
name = "macos_universal",
Expand Down Expand Up @@ -308,6 +309,7 @@ cc_library(
name = "face_detection_calculators",
deps = [
"@com_google_mediapipe//mediapipe/framework/tool:switch_mux_calculator",
"@com_google_mediapipe//mediapipe/tasks/cc/vision/face_detector:face_detector",
] + select({
"@com_google_mediapipe//mediapipe/gpu:disable_gpu": [
"@com_google_mediapipe//mediapipe/graphs/face_detection:desktop_live_calculators",
Expand Down Expand Up @@ -426,6 +428,10 @@ pkg_zip(
],
)

mediapipe_files(srcs = [
"blaze_face_short_range.tflite",
])

pkg_asset(
name = "mediapipe_assets",
srcs = select({
Expand Down Expand Up @@ -465,6 +471,7 @@ pkg_asset(
filegroup(
name = "face_detection_assets",
srcs = [
":blaze_face_short_range.tflite",
"@com_google_mediapipe//mediapipe/modules/face_detection:face_detection_full_range_sparse.tflite",
"@com_google_mediapipe//mediapipe/modules/face_detection:face_detection_full_range.tflite",
"@com_google_mediapipe//mediapipe/modules/face_detection:face_detection_short_range.tflite",
Expand Down Expand Up @@ -557,6 +564,7 @@ pkg_zip(
"//mediapipe_api/modules/face_geometry/protos:proto_srcs",
"//mediapipe_api/modules/holistic_landmark/calculators:proto_srcs",
"//mediapipe_api/tasks/cc/core/proto:proto_srcs",
"//mediapipe_api/tasks/cc/vision/face_detector/proto:proto_srcs",
"//mediapipe_api/util:proto_srcs",
"//mediapipe_api/util/tracking:proto_srcs",
],
Expand Down
Loading

0 comments on commit c777113

Please sign in to comment.