From bcce0e9d1e1c25c53f58972ceb633961cf9716ec Mon Sep 17 00:00:00 2001 From: Junrou Nishida Date: Mon, 1 Jan 2024 13:07:46 +0900 Subject: [PATCH] feat: build Image/ImageFrame from Texture2D (#1083) * refactor: add ImageFormatExtension * feat: build Image/ImageFrame from Texture2D * refactor: format.NumberOfChannels * refactor: implement ByteDepth as a extension method * refactor: implement ChannelSize as a extension method * refactor: use new constructors in TextureFrame --- .../Scripts/Extension/ImageFormatExtension.cs | 121 ++++++++++++++++++ .../Extension/ImageFormatExtension.cs.meta | 11 ++ .../Scripts/Framework/Formats/Image.cs | 6 + .../Scripts/Framework/Formats/ImageFrame.cs | 118 ++--------------- .../Unity/Experimental/TextureFrame.cs | 4 +- .../Unity/Extension/ImageFrameExtension.cs | 18 +-- .../Tests/EditMode/Framework/PacketTest.cs | 4 +- 7 files changed, 160 insertions(+), 122 deletions(-) create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs create mode 100644 Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs.meta diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs new file mode 100644 index 000000000..80cda9f87 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs @@ -0,0 +1,121 @@ +// Copyright (c) 2023 homuler +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +namespace Mediapipe +{ + public static class ImageFormatExtension + { + /// + /// The number of channels for a . + /// If channels don't make sense in the , returns 0. + /// + /// + /// Unlike the original implementation, this API won't signal SIGABRT. + /// + public static int NumberOfChannels(this ImageFormat.Types.Format format) + { + switch (format) + { + case ImageFormat.Types.Format.Srgb: + case ImageFormat.Types.Format.Srgb48: + return 3; + case ImageFormat.Types.Format.Srgba: + case ImageFormat.Types.Format.Srgba64: + case ImageFormat.Types.Format.Sbgra: + return 4; + case ImageFormat.Types.Format.Gray8: + case ImageFormat.Types.Format.Gray16: + return 1; + case ImageFormat.Types.Format.Vec32F1: + return 1; + case ImageFormat.Types.Format.Vec32F2: + return 2; + case ImageFormat.Types.Format.Vec32F4: + return 4; + case ImageFormat.Types.Format.Lab8: + return 3; + case ImageFormat.Types.Format.Ycbcr420P: + case ImageFormat.Types.Format.Ycbcr420P10: + case ImageFormat.Types.Format.Unknown: + default: + return 0; + } + } + + /// + /// The depth of each channel in bytes for a . + /// If channels don't make sense in the , returns 0. + /// + /// + /// Unlike the original implementation, this API won't signal SIGABRT. + /// + public static int ByteDepth(this ImageFormat.Types.Format format) + { + switch (format) + { + case ImageFormat.Types.Format.Srgb: + case ImageFormat.Types.Format.Srgba: + case ImageFormat.Types.Format.Sbgra: + return 1; + case ImageFormat.Types.Format.Srgb48: + case ImageFormat.Types.Format.Srgba64: + return 2; + case ImageFormat.Types.Format.Gray8: + return 1; + case ImageFormat.Types.Format.Gray16: + return 2; + case ImageFormat.Types.Format.Vec32F1: + case ImageFormat.Types.Format.Vec32F2: + case ImageFormat.Types.Format.Vec32F4: + return 4; + case ImageFormat.Types.Format.Lab8: + return 1; + case ImageFormat.Types.Format.Ycbcr420P: + case ImageFormat.Types.Format.Ycbcr420P10: + case ImageFormat.Types.Format.Unknown: + default: + return 0; + } + } + + /// + /// The channel size for a . + /// If channels don't make sense in the , returns 0. + /// + /// + /// Unlike the original implementation, this API won't signal SIGABRT. + /// + public static int ChannelSize(this ImageFormat.Types.Format format) + { + switch (format) + { + case ImageFormat.Types.Format.Srgb: + case ImageFormat.Types.Format.Srgba: + case ImageFormat.Types.Format.Sbgra: + return sizeof(byte); + case ImageFormat.Types.Format.Srgb48: + case ImageFormat.Types.Format.Srgba64: + return sizeof(ushort); + case ImageFormat.Types.Format.Gray8: + return sizeof(byte); + case ImageFormat.Types.Format.Gray16: + return sizeof(ushort); + case ImageFormat.Types.Format.Vec32F1: + case ImageFormat.Types.Format.Vec32F2: + case ImageFormat.Types.Format.Vec32F4: + // sizeof float may be wrong since it's platform-dependent, but we assume that it's constant across all supported platforms. + return sizeof(float); + case ImageFormat.Types.Format.Lab8: + return sizeof(byte); + case ImageFormat.Types.Format.Ycbcr420P: + case ImageFormat.Types.Format.Ycbcr420P10: + case ImageFormat.Types.Format.Unknown: + default: + return 0; + } + } + } +} diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs.meta b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs.meta new file mode 100644 index 000000000..18a31fea4 --- /dev/null +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Extension/ImageFormatExtension.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 552e7f7786f508d5e8a23edf02fd5f14 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/Image.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/Image.cs index 419b8f78e..61ea18ec3 100644 --- a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/Image.cs +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/Image.cs @@ -7,6 +7,7 @@ using System; using Unity.Collections; using Unity.Collections.LowLevel.Unsafe; +using UnityEngine; namespace Mediapipe { @@ -43,6 +44,11 @@ public Image(ImageFormat.Types.Format format, int width, int height, int widthSt : this(format, width, height, widthStep, pixelData, _VoidDeleter) { } + // TODO: detect format from the texture + public Image(ImageFormat.Types.Format format, Texture2D texture) : + this(format, texture.width, texture.height, format.NumberOfChannels() * texture.width, texture.GetRawTextureData()) + { } + #if UNITY_EDITOR_LINUX || UNITY_STANDLONE_LINUX || UNITY_ANDROID public Image(uint target, uint name, int width, int height, GpuBufferFormat format, GlTextureBuffer.DeletionCallback callback, GlContext glContext) : base() { diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/ImageFrame.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/ImageFrame.cs index 1d930d247..8df86dbea 100644 --- a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/ImageFrame.cs +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Framework/Formats/ImageFrame.cs @@ -7,6 +7,7 @@ using System; using Unity.Collections; using Unity.Collections.LowLevel.Unsafe; +using UnityEngine; namespace Mediapipe { @@ -65,6 +66,11 @@ public ImageFrame(ImageFormat.Types.Format format, int width, int height, int wi : this(format, width, height, widthStep, pixelData, _VoidDeleter) { } + // TODO: detect format from the texture + public ImageFrame(ImageFormat.Types.Format format, Texture2D texture) : + this(format, texture.width, texture.height, format.NumberOfChannels() * texture.width, texture.GetRawTextureData()) + { } + protected override void DeleteMpPtr() { UnsafeNativeMethods.mp_ImageFrame__delete(ptr); @@ -75,112 +81,6 @@ protected override void DeleteMpPtr() [AOT.MonoPInvokeCallback(typeof(Deleter))] internal static void VoidDeleter(IntPtr _) { } - /// - /// The number of channels for a . - /// If channels don't make sense in the , returns 0. - /// - /// - /// Unlike the original implementation, this API won't signal SIGABRT. - /// - public static int NumberOfChannelsForFormat(ImageFormat.Types.Format format) - { - switch (format) - { - case ImageFormat.Types.Format.Srgb: - case ImageFormat.Types.Format.Srgb48: - return 3; - case ImageFormat.Types.Format.Srgba: - case ImageFormat.Types.Format.Srgba64: - case ImageFormat.Types.Format.Sbgra: - return 4; - case ImageFormat.Types.Format.Gray8: - case ImageFormat.Types.Format.Gray16: - return 1; - case ImageFormat.Types.Format.Vec32F1: - return 1; - case ImageFormat.Types.Format.Vec32F2: - return 2; - case ImageFormat.Types.Format.Lab8: - return 3; - case ImageFormat.Types.Format.Ycbcr420P: - case ImageFormat.Types.Format.Ycbcr420P10: - case ImageFormat.Types.Format.Unknown: - default: - return 0; - } - } - - /// - /// The channel size for a . - /// If channels don't make sense in the , returns 0. - /// - /// - /// Unlike the original implementation, this API won't signal SIGABRT. - /// - public static int ChannelSizeForFormat(ImageFormat.Types.Format format) - { - switch (format) - { - case ImageFormat.Types.Format.Srgb: - case ImageFormat.Types.Format.Srgba: - case ImageFormat.Types.Format.Sbgra: - return sizeof(byte); - case ImageFormat.Types.Format.Srgb48: - case ImageFormat.Types.Format.Srgba64: - return sizeof(ushort); - case ImageFormat.Types.Format.Gray8: - return sizeof(byte); - case ImageFormat.Types.Format.Gray16: - return sizeof(ushort); - case ImageFormat.Types.Format.Vec32F1: - case ImageFormat.Types.Format.Vec32F2: - // sizeof float may be wrong since it's platform-dependent, but we assume that it's constant across all supported platforms. - return sizeof(float); - case ImageFormat.Types.Format.Lab8: - return sizeof(byte); - case ImageFormat.Types.Format.Ycbcr420P: - case ImageFormat.Types.Format.Ycbcr420P10: - case ImageFormat.Types.Format.Unknown: - default: - return 0; - } - } - - /// - /// The depth of each channel in bytes for a . - /// If channels don't make sense in the , returns 0. - /// - /// - /// Unlike the original implementation, this API won't signal SIGABRT. - /// - public static int ByteDepthForFormat(ImageFormat.Types.Format format) - { - switch (format) - { - case ImageFormat.Types.Format.Srgb: - case ImageFormat.Types.Format.Srgba: - case ImageFormat.Types.Format.Sbgra: - return 1; - case ImageFormat.Types.Format.Srgb48: - case ImageFormat.Types.Format.Srgba64: - return 2; - case ImageFormat.Types.Format.Gray8: - return 1; - case ImageFormat.Types.Format.Gray16: - return 2; - case ImageFormat.Types.Format.Vec32F1: - case ImageFormat.Types.Format.Vec32F2: - return 4; - case ImageFormat.Types.Format.Lab8: - return 1; - case ImageFormat.Types.Format.Ycbcr420P: - case ImageFormat.Types.Format.Ycbcr420P10: - case ImageFormat.Types.Format.Unknown: - default: - return 0; - } - } - public bool IsEmpty() { return SafeNativeMethods.mp_ImageFrame__IsEmpty(mpPtr); @@ -223,7 +123,7 @@ public int Height() /// public int ChannelSize() { - return ChannelSizeForFormat(Format()); + return Format().ChannelSize(); } /// @@ -235,7 +135,7 @@ public int ChannelSize() /// public int NumberOfChannels() { - return NumberOfChannelsForFormat(Format()); + return Format().NumberOfChannels(); } /// @@ -247,7 +147,7 @@ public int NumberOfChannels() /// public int ByteDepth() { - return ByteDepthForFormat(Format()); + return Format().ByteDepth(); } public int WidthStep() diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Experimental/TextureFrame.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Experimental/TextureFrame.cs index c98025673..176b0b5cd 100644 --- a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Experimental/TextureFrame.cs +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Experimental/TextureFrame.cs @@ -222,9 +222,9 @@ public IntPtr GetNativeTexturePtr() public Guid GetInstanceID() => _instanceId; - public ImageFrame BuildImageFrame() => new ImageFrame(imageFormat, width, height, 4 * width, GetRawTextureData()); + public ImageFrame BuildImageFrame() => new ImageFrame(imageFormat, _texture); - public Image BuildCPUImage() => new Image(imageFormat, width, height, 4 * width, GetRawTextureData()); + public Image BuildCPUImage() => new Image(imageFormat, _texture); public GpuBuffer BuildGpuBuffer(GlContext glContext) { diff --git a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Extension/ImageFrameExtension.cs b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Extension/ImageFrameExtension.cs index 2ce007bbd..9d0393eb1 100644 --- a/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Extension/ImageFrameExtension.cs +++ b/Packages/com.github.homuler.mediapipe/Runtime/Scripts/Unity/Extension/ImageFrameExtension.cs @@ -62,7 +62,7 @@ public static bool TryReadChannel(this Image image, int channelNumber, byte[] ch var format = image.ImageFormat(); using (var pixelWriteLock = new PixelWriteLock(image)) { - return TryReadChannel(format, image.Width(), image.Height(), image.Step(), ImageFrame.ByteDepthForFormat(format), + return TryReadChannel(format, image.Width(), image.Height(), image.Step(), format.ByteDepth(), channelNumber, pixelWriteLock.Pixels(), channelData, isHorizontallyFlipped, isVerticallyFlipped); } } @@ -70,7 +70,7 @@ public static bool TryReadChannel(this Image image, int channelNumber, byte[] ch private static bool TryReadChannel(ImageFormat.Types.Format format, int width, int height, int widthStep, int byteDepth, int channelNumber, IntPtr pixelData, byte[] channelData, bool isHorizontallyFlipped, bool isVerticallyFlipped) { - var channelCount = ImageFrame.NumberOfChannelsForFormat(format); + var channelCount = format.NumberOfChannels(); if (!IsChannelNumberValid(channelCount, channelNumber)) { return false; @@ -143,7 +143,7 @@ public static bool TryReadChannel(this Image image, int channelNumber, ushort[] var format = image.ImageFormat(); using (var pixelWriteLock = new PixelWriteLock(image)) { - return TryReadChannel(format, image.Width(), image.Height(), image.Step(), ImageFrame.ByteDepthForFormat(format), + return TryReadChannel(format, image.Width(), image.Height(), image.Step(), format.ByteDepth(), channelNumber, pixelWriteLock.Pixels(), channelData, isHorizontallyFlipped, isVerticallyFlipped); } } @@ -151,7 +151,7 @@ public static bool TryReadChannel(this Image image, int channelNumber, ushort[] private static bool TryReadChannel(ImageFormat.Types.Format format, int width, int height, int widthStep, int byteDepth, int channelNumber, IntPtr pixelData, ushort[] channelData, bool isHorizontallyFlipped, bool isVerticallyFlipped) { - var channelCount = ImageFrame.NumberOfChannelsForFormat(format); + var channelCount = format.NumberOfChannels(); if (!IsChannelNumberValid(channelCount, channelNumber)) { return false; @@ -220,7 +220,7 @@ public static bool TryReadChannel(this Image image, int channelNumber, float[] c var format = image.ImageFormat(); using (var pixelWriteLock = new PixelWriteLock(image)) { - return TryReadChannel(format, image.Width(), image.Height(), image.Step(), ImageFrame.ByteDepthForFormat(format), + return TryReadChannel(format, image.Width(), image.Height(), image.Step(), format.ByteDepth(), channelNumber, pixelWriteLock.Pixels(), channelData, isHorizontallyFlipped, isVerticallyFlipped); } } @@ -228,7 +228,7 @@ public static bool TryReadChannel(this Image image, int channelNumber, float[] c private static bool TryReadChannel(ImageFormat.Types.Format format, int width, int height, int widthStep, int byteDepth, int channelNumber, IntPtr pixelData, float[] channelData, bool isHorizontallyFlipped, bool isVerticallyFlipped) { - var channelCount = ImageFrame.NumberOfChannelsForFormat(format); + var channelCount = format.NumberOfChannels(); if (!IsChannelNumberValid(channelCount, channelNumber)) { return false; @@ -296,7 +296,7 @@ public static bool TryReadChannelNormalized(this Image image, int channelNumber, var format = image.ImageFormat(); using (var pixelWriteLock = new PixelWriteLock(image)) { - return TryReadChannelNormalized(format, image.Width(), image.Height(), image.Step(), ImageFrame.ByteDepthForFormat(format), + return TryReadChannelNormalized(format, image.Width(), image.Height(), image.Step(), format.ByteDepth(), channelNumber, pixelWriteLock.Pixels(), normalizedChannelData, isHorizontallyFlipped, isVerticallyFlipped); } } @@ -304,7 +304,7 @@ public static bool TryReadChannelNormalized(this Image image, int channelNumber, private static bool TryReadChannelNormalized(ImageFormat.Types.Format format, int width, int height, int widthStep, int byteDepth, int channelNumber, IntPtr pixelData, float[] normalizedChannelData, bool isHorizontallyFlipped, bool isVerticallyFlipped) { - var channelCount = ImageFrame.NumberOfChannelsForFormat(format); + var channelCount = format.NumberOfChannels(); if (!IsChannelNumberValid(channelCount, channelNumber)) { return false; @@ -341,7 +341,7 @@ public static bool TryReadPixelData(this Image image, Color32[] colors) var format = image.ImageFormat(); using (var pixelWriteLock = new PixelWriteLock(image)) { - return TryReadPixelData(format, image.Width(), image.Height(), image.Step(), ImageFrame.ByteDepthForFormat(format), pixelWriteLock.Pixels(), colors); + return TryReadPixelData(format, image.Width(), image.Height(), image.Step(), format.ByteDepth(), pixelWriteLock.Pixels(), colors); } } diff --git a/Packages/com.github.homuler.mediapipe/Tests/EditMode/Framework/PacketTest.cs b/Packages/com.github.homuler.mediapipe/Tests/EditMode/Framework/PacketTest.cs index 771f4d21b..0cdd3500b 100644 --- a/Packages/com.github.homuler.mediapipe/Tests/EditMode/Framework/PacketTest.cs +++ b/Packages/com.github.homuler.mediapipe/Tests/EditMode/Framework/PacketTest.cs @@ -440,7 +440,7 @@ private void AssertImage(Image image, int width, int height, ImageFormat.Types.F using (var pixelLock = new PixelWriteLock(image)) { - var pixelData = new byte[width * height * ImageFrame.NumberOfChannelsForFormat(format)]; + var pixelData = new byte[width * height * format.NumberOfChannels()]; Marshal.Copy(pixelLock.Pixels(), pixelData, 0, pixelData.Length); Assert.AreEqual(expectedBytes, pixelData); @@ -453,7 +453,7 @@ private void AssertImageFrame(ImageFrame imageFrame, int width, int height, Imag Assert.AreEqual(height, imageFrame.Height()); Assert.AreEqual(format, imageFrame.Format()); - var pixelData = new byte[width * height * ImageFrame.NumberOfChannelsForFormat(format)]; + var pixelData = new byte[width * height * format.NumberOfChannels()]; Marshal.Copy(imageFrame.MutablePixelData(), pixelData, 0, pixelData.Length); Assert.AreEqual(expectedBytes, pixelData);