refactor(example)!: MediaPipe rotates the input image

homuler · Dec 21, 2020 · 0eda94c · 0eda94c
1 parent a1e4cc0
commit 0eda94c
Show file tree

Hide file tree

Showing 19 changed files with 165 additions and 26 deletions.
diff --git a/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt
@@ -40,10 +40,21 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Subgraph that detects faces.
 node {
   calculator: "FaceDetectionFrontCpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   output_stream: "DETECTIONS:face_detections"
 }
 

diff --git a/Assets/MediaPipe/Examples/Resources/face_detection_mobile_gpu.txt b/Assets/MediaPipe/Examples/Resources/face_detection_mobile_gpu.txt
@@ -40,10 +40,21 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:throttled_input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Subgraph that detects faces.
 node {
   calculator: "FaceDetectionFrontGpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   output_stream: "DETECTIONS:face_detections"
 }
 

diff --git a/Assets/MediaPipe/Examples/Resources/face_mesh_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/face_mesh_desktop_live.txt
@@ -47,10 +47,21 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Subgraph that detects faces and corresponding landmarks.
 node {
   calculator: "FaceLandmarkFrontCpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_side_packet: "NUM_FACES:num_faces"
   output_stream: "LANDMARKS:multi_face_landmarks"
   output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"

diff --git a/Assets/MediaPipe/Examples/Resources/face_mesh_mobile.txt b/Assets/MediaPipe/Examples/Resources/face_mesh_mobile.txt
@@ -50,10 +50,21 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:throttled_input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Subgraph that detects faces and corresponding landmarks.
 node {
   calculator: "FaceLandmarkFrontGpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_side_packet: "NUM_FACES:num_faces"
   output_stream: "LANDMARKS:multi_face_landmarks"
   output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"

diff --git a/Assets/MediaPipe/Examples/Resources/hair_segmentation_mobile_gpu.txt b/Assets/MediaPipe/Examples/Resources/hair_segmentation_mobile_gpu.txt
@@ -56,6 +56,7 @@ node: {
     [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
       output_width: 512
       output_height: 512
+      rotation_mode: 3
     }
   }
 }

diff --git a/Assets/MediaPipe/Examples/Resources/hand_tracking_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/hand_tracking_desktop_live.txt
@@ -34,10 +34,21 @@ output_stream: "hand_rects_from_palm_detections"
 output_stream: "hand_landmarks_presence"
 output_stream: "palm_detections_presence"
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Detects/tracks hand landmarks.
 node {
   calculator: "HandLandmarkTrackingCpu"
-  input_stream: "IMAGE:input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_side_packet: "NUM_HANDS:num_hands"
   output_stream: "LANDMARKS:hand_landmarks"
   output_stream: "HANDEDNESS:handedness"

diff --git a/Assets/MediaPipe/Examples/Resources/hand_tracking_mobile.txt b/Assets/MediaPipe/Examples/Resources/hand_tracking_mobile.txt
@@ -54,10 +54,21 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:throttled_input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Detects/tracks hand landmarks.
 node {
   calculator: "HandLandmarkTrackingGpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_side_packet: "NUM_HANDS:num_hands"
   output_stream: "LANDMARKS:hand_landmarks"
   output_stream: "HANDEDNESS:handedness"

diff --git a/Assets/MediaPipe/Examples/Resources/iris_tracking_cpu.txt b/Assets/MediaPipe/Examples/Resources/iris_tracking_cpu.txt
@@ -38,10 +38,21 @@ node {
   }
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Detects faces and corresponding landmarks.
 node {
   calculator: "FaceLandmarkFrontCpu"
-  input_stream: "IMAGE:input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_side_packet: "NUM_FACES:num_faces"
   output_stream: "LANDMARKS:multi_face_landmarks"
   output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
@@ -107,7 +118,7 @@ node {
 # Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
 node {
   calculator: "IrisLandmarkLeftAndRightCpu"
-  input_stream: "IMAGE:input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
   input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
   output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"

diff --git a/Assets/MediaPipe/Examples/Resources/iris_tracking_gpu.txt b/Assets/MediaPipe/Examples/Resources/iris_tracking_gpu.txt
@@ -46,6 +46,17 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:throttled_input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Defines how many faces to detect. Iris tracking currently only handles one
 # face (left and right eye), and therefore this should always be set to 1.
 node {
@@ -61,7 +72,7 @@ node {
 # Detects faces and corresponding landmarks.
 node {
   calculator: "FaceLandmarkFrontGpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_side_packet: "NUM_FACES:num_faces"
   output_stream: "LANDMARKS:multi_face_landmarks"
   output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
@@ -127,7 +138,7 @@ node {
 # Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI).
 node {
   calculator: "IrisLandmarkLeftAndRightGpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks"
   input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks"
   output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks"

diff --git a/Assets/MediaPipe/Examples/Resources/object_detection_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/object_detection_desktop_live.txt
@@ -56,6 +56,7 @@ node: {
     [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
       output_width: 320
       output_height: 320
+      rotation_mode: 3
     }
   }
 }

diff --git a/Assets/MediaPipe/Examples/Resources/object_detection_mobile_gpu.txt b/Assets/MediaPipe/Examples/Resources/object_detection_mobile_gpu.txt
@@ -57,6 +57,7 @@ node: {
     [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
       output_width: 320
       output_height: 320
+      rotation_mode: 3
     }
   }
 }

diff --git a/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_cpu.txt b/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_cpu.txt
@@ -42,10 +42,21 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Subgraph that detects poses and corresponding landmarks.
 node {
   calculator: "PoseLandmarkUpperBodyCpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   output_stream: "LANDMARKS:pose_landmarks"
   output_stream: "DETECTION:pose_detection"
   output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
@@ -54,7 +65,7 @@ node {
 # Calculates size of the image.
 node {
   calculator: "ImagePropertiesCalculator"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   output_stream: "SIZE:image_size"
 }
 

diff --git a/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_gpu.txt b/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_gpu.txt
@@ -42,10 +42,21 @@ node {
   output_stream: "throttled_input_video"
 }
 
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:throttled_input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      rotation_mode: 3
+    }
+  }
+}
+
 # Subgraph that detects poses and corresponding landmarks.
 node {
   calculator: "PoseLandmarkUpperBodyGpu"
-  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "IMAGE:transformed_input_video"
   output_stream: "LANDMARKS:pose_landmarks"
   output_stream: "DETECTION:pose_detection"
   output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
@@ -54,7 +65,7 @@ node {
 # Calculates size of the image.
 node {
   calculator: "ImagePropertiesCalculator"
-  input_stream: "IMAGE_GPU:throttled_input_video"
+  input_stream: "IMAGE_GPU:transformed_input_video"
   output_stream: "SIZE:image_size"
 }
 

diff --git a/Assets/MediaPipe/Examples/Scenes/DesktopCPU.unity b/Assets/MediaPipe/Examples/Scenes/DesktopCPU.unity
@@ -824,6 +824,7 @@ GameObject:
   - component: {fileID: 496037461}
   - component: {fileID: 496037460}
   - component: {fileID: 496037459}
+  - component: {fileID: 496037464}
   m_Layer: 0
   m_Name: WebCamScreen
   m_TagString: Untagged
@@ -846,7 +847,7 @@ MonoBehaviour:
   DefaultWidth: 640
   DefaultHeight: 480
   FPS: 30
-  focalLengthPx: 2
+  DefaultFocalLengthPx: 2
 --- !u!64 &496037460
 MeshCollider:
   m_ObjectHideFlags: 0
@@ -922,6 +923,19 @@ Transform:
   m_Father: {fileID: 0}
   m_RootOrder: 2
   m_LocalEulerAnglesHint: {x: 90, y: 180, z: 0}
+--- !u!114 &496037464
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 496037458}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: d5da564da19cb6b7d8e4f97f269edc5d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  poolSize: 20
 --- !u!1 &624144232
 GameObject:
   m_ObjectHideFlags: 0

diff --git a/Assets/MediaPipe/Examples/Scripts/DemoGraph.cs b/Assets/MediaPipe/Examples/Scripts/DemoGraph.cs
@@ -39,7 +39,8 @@ public abstract class DemoGraph : MonoBehaviour, IDemoGraph<TextureFrame> {
     ImageFrame imageFrame = null;
 
     if (!IsGpuEnabled()) {
-      imageFrame = CopyPixelsFrom(textureFrame);
+      imageFrame = new ImageFrame(
+        ImageFormat.Format.SRGBA, textureFrame.width, textureFrame.height, 4 * textureFrame.width, textureFrame.GetRawNativeByteArray());
       var packet = new ImageFramePacket(imageFrame, timestamp);
 
       return graph.AddPacketToInputStream(inputStream, packet);
@@ -53,16 +54,12 @@ public abstract class DemoGraph : MonoBehaviour, IDemoGraph<TextureFrame> {
         var glTextureBuffer = new GlTextureBuffer((UInt32)glTextureName, textureFrame.width, textureFrame.height,
                                                   textureFrame.gpuBufferformat, textureFrame.OnRelease, glContext);
         var gpuBuffer = new GpuBuffer(glTextureBuffer);
-        var texture = gpuHelper.CreateSourceTexture(gpuBuffer);
-        var gpuFrame = texture.GetGpuBufferFrame();
-
-        Gl.Flush();
-        texture.Release();
 
         return graph.AddPacketToInputStream(inputStream, new GpuBufferPacket(gpuBuffer, timestamp));
       });
     #else
-      imageFrame = CopyPixelsFrom(textureFrame);
+      imageFrame = new ImageFrame(
+        ImageFormat.Format.SRGBA, textureFrame.width, textureFrame.height, 4 * textureFrame.width, textureFrame.GetRawNativeByteArray());
 
       return gpuHelper.RunInGlContext(() => {
         var texture = gpuHelper.CreateSourceTexture(imageFrame);
@@ -76,10 +73,6 @@ public abstract class DemoGraph : MonoBehaviour, IDemoGraph<TextureFrame> {
     #endif
   }
 
-  private ImageFrame CopyPixelsFrom(TextureFrame textureFrame) {
-    return ImageFrame.FromPixels32(textureFrame.GetPixels32(), textureFrame.width, textureFrame.height, true);
-  }
-
   public abstract void RenderOutput(WebCamScreenController screenController, TextureFrame textureFrame);
 
   public void Stop() {