Merge pull request #33 from skumra/release/0.3.0

Release/0.3.0
skumra · May 21, 2021 · bdd4936 · bdd4936
2 parents 08efed7 + 5356954
commit bdd4936
Show file tree

Hide file tree

Showing 13 changed files with 152 additions and 21 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1,3 +1,6 @@
+.92 filter=lfs diff=lfs merge=lfs -text
+.93 filter=lfs diff=lfs merge=lfs -text
+.94 filter=lfs diff=lfs merge=lfs -text
 .96 filter=lfs diff=lfs merge=lfs -text
 .97 filter=lfs diff=lfs merge=lfs -text
 .98 filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
@@ -14,12 +14,13 @@ Sulabh Kumra, Shirin Joshi, Ferat Sahin
 If you use this project in your research or wish to refer to the baseline results published in the paper, please use the following BibTeX entry:
 
 ```
-@inproceedings{kumra2019antipodal,
-  title={Antipodal Robotic Grasping using Generative Residual Convolutional Neural Network},
+@inproceedings{kumra2020antipodal,
   author={Kumra, Sulabh and Joshi, Shirin and Sahin, Ferat},
-  booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
+  booktitle={2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, 
+  title={Antipodal Robotic Grasping using Generative Residual Convolutional Neural Network}, 
   year={2020},
-  organization={IEEE}
+  pages={9626-9633},
+  doi={10.1109/IROS45743.2020.9340777}}
 }
 ```
 
@@ -78,22 +79,34 @@ This repository supports both the [Cornell Grasping Dataset](http://pr.cs.cornel
 
 A model can be trained using the `train_network.py` script.  Run `train_network.py --help` to see a full list of options.
 
-For example:
+Example for Cornell dataset:
 
 ```bash
 python train_network.py --dataset cornell --dataset-path <Path To Dataset> --description training_cornell
 ```
 
+Example for Jacquard dataset:
+
+```bash
+python train_network.py --dataset jacquard --dataset-path <Path To Dataset> --description training_jacquard --use-dropout 0 --input-size 300
+```
+
 ## Model Evaluation
 
 The trained network can be evaluated using the `evaluate.py` script.  Run `evaluate.py --help` for a full set of options.
 
-For example:
+Example for Cornell dataset:
 
 ```bash
 python evaluate.py --network <Path to Trained Network> --dataset cornell --dataset-path <Path to Dataset> --iou-eval
 ```
 
+Example for Jacquard dataset:
+
+```bash
+python evaluate.py --network <Path to Trained Network> --dataset jacquard --dataset-path <Path to Dataset> --iou-eval --use-dropout 0 --input-size 300
+```
+
 ## Run Tasks
 A task can be executed using the relevant run script. All task scripts are named as `run_<task name>.py`. For example, to run the grasp generator run:
 ```bash

diff --git a/evaluate.py b/evaluate.py
@@ -20,6 +20,8 @@ def parse_args():
     # Network
     parser.add_argument('--network', metavar='N', type=str, nargs='+',
                         help='Path to saved networks to evaluate')
+    parser.add_argument('--input-size', type=int, default=224,
+                        help='Input image size for the network')
 
     # Dataset
     parser.add_argument('--dataset', type=str,
@@ -79,6 +81,7 @@ def parse_args():
     logging.info('Loading {} Dataset...'.format(args.dataset.title()))
     Dataset = get_dataset(args.dataset)
     test_dataset = Dataset(args.dataset_path,
+                           output_size=args.input_size,
                            ds_rotate=args.ds_rotate,
                            random_rotate=args.augment,
                            random_zoom=args.augment,

diff --git a/train_network.py b/train_network.py
@@ -27,6 +27,8 @@ def parse_args():
     # Network
     parser.add_argument('--network', type=str, default='grconvnet3',
                         help='Network name in inference/models')
+    parser.add_argument('--input-size', type=int, default=224,
+                        help='Input image size for the network')
     parser.add_argument('--use-depth', type=int, default=1,
                         help='Use Depth image for training (1/0)')
     parser.add_argument('--use-rgb', type=int, default=1,
@@ -37,6 +39,8 @@ def parse_args():
                         help='Dropout prob for training (0-1)')
     parser.add_argument('--channel-size', type=int, default=32,
                         help='Internal channel size for the network')
+    parser.add_argument('--iou-threshold', type=float, default=0.25,
+                        help='Threshold for IOU matching')
 
     # Datasets
     parser.add_argument('--dataset', type=str,
@@ -55,7 +59,7 @@ def parse_args():
     # Training
     parser.add_argument('--batch-size', type=int, default=8,
                         help='Batch size')
-    parser.add_argument('--epochs', type=int, default=30,
+    parser.add_argument('--epochs', type=int, default=50,
                         help='Training epochs')
     parser.add_argument('--batches-per-epoch', type=int, default=1000,
                         help='Batches per Epoch')
@@ -78,12 +82,13 @@ def parse_args():
     return args
 
 
-def validate(net, device, val_data):
+def validate(net, device, val_data, iou_threshold):
     """
     Run validation.
     :param net: Network
     :param device: Torch device
     :param val_data: Validation Dataset
+    :param iou_threshold: IoU threshold
     :return: Successes, Failures and Losses
     """
     net.eval()
@@ -121,6 +126,7 @@ def validate(net, device, val_data):
                                                val_data.dataset.get_gtbb(didx, rot, zoom_factor),
                                                no_grasps=1,
                                                grasp_width=w_out,
+                                               threshold=iou_threshold
                                                )
 
             if s:
@@ -241,6 +247,7 @@ def run():
     logging.info('Loading {} Dataset...'.format(args.dataset.title()))
     Dataset = get_dataset(args.dataset)
     dataset = Dataset(args.dataset_path,
+                      output_size=args.input_size,
                       ds_rotate=args.ds_rotate,
                       random_rotate=True,
                       random_zoom=True,
@@ -298,10 +305,10 @@ def run():
         raise NotImplementedError('Optimizer {} is not implemented'.format(args.optim))
 
     # Print model architecture.
-    summary(net, (input_channels, 224, 224))
+    summary(net, (input_channels, args.input_size, args.input_size))
     f = open(os.path.join(save_folder, 'arch.txt'), 'w')
     sys.stdout = f
-    summary(net, (input_channels, 224, 224))
+    summary(net, (input_channels, args.input_size, args.input_size))
     sys.stdout = sys.__stdout__
     f.close()
 
@@ -317,7 +324,7 @@ def run():
 
         # Run Validation
         logging.info('Validating...')
-        test_results = validate(net, device, val_data)
+        test_results = validate(net, device, val_data, args.iou_threshold)
         logging.info('%d/%d = %f' % (test_results['correct'], test_results['correct'] + test_results['failed'],
                                      test_results['correct'] / (test_results['correct'] + test_results['failed'])))
 

diff --git a/trained-models/jacquard-d-grconvnet3-drop0-ch32/arch.txt b/trained-models/jacquard-d-grconvnet3-drop0-ch32/arch.txt
@@ -0,0 +1,57 @@
+----------------------------------------------------------------
+        Layer (type)               Output Shape         Param #
+================================================================
+            Conv2d-1         [-1, 32, 224, 224]           2,624
+       BatchNorm2d-2         [-1, 32, 224, 224]              64
+            Conv2d-3         [-1, 64, 112, 112]          32,832
+       BatchNorm2d-4         [-1, 64, 112, 112]             128
+            Conv2d-5          [-1, 128, 56, 56]         131,200
+       BatchNorm2d-6          [-1, 128, 56, 56]             256
+            Conv2d-7          [-1, 128, 56, 56]         147,584
+       BatchNorm2d-8          [-1, 128, 56, 56]             256
+            Conv2d-9          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-10          [-1, 128, 56, 56]             256
+    ResidualBlock-11          [-1, 128, 56, 56]               0
+           Conv2d-12          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-13          [-1, 128, 56, 56]             256
+           Conv2d-14          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-15          [-1, 128, 56, 56]             256
+    ResidualBlock-16          [-1, 128, 56, 56]               0
+           Conv2d-17          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-18          [-1, 128, 56, 56]             256
+           Conv2d-19          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-20          [-1, 128, 56, 56]             256
+    ResidualBlock-21          [-1, 128, 56, 56]               0
+           Conv2d-22          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-23          [-1, 128, 56, 56]             256
+           Conv2d-24          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-25          [-1, 128, 56, 56]             256
+    ResidualBlock-26          [-1, 128, 56, 56]               0
+           Conv2d-27          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-28          [-1, 128, 56, 56]             256
+           Conv2d-29          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-30          [-1, 128, 56, 56]             256
+    ResidualBlock-31          [-1, 128, 56, 56]               0
+  ConvTranspose2d-32         [-1, 64, 113, 113]         131,136
+      BatchNorm2d-33         [-1, 64, 113, 113]             128
+  ConvTranspose2d-34         [-1, 32, 225, 225]          32,800
+      BatchNorm2d-35         [-1, 32, 225, 225]              64
+  ConvTranspose2d-36         [-1, 32, 225, 225]          82,976
+          Dropout-37         [-1, 32, 225, 225]               0
+           Conv2d-38          [-1, 1, 224, 224]             129
+          Dropout-39         [-1, 32, 225, 225]               0
+           Conv2d-40          [-1, 1, 224, 224]             129
+          Dropout-41         [-1, 32, 225, 225]               0
+           Conv2d-42          [-1, 1, 224, 224]             129
+          Dropout-43         [-1, 32, 225, 225]               0
+           Conv2d-44          [-1, 1, 224, 224]             129
+================================================================
+Total params: 1,893,124
+Trainable params: 1,893,124
+Non-trainable params: 0
+----------------------------------------------------------------
+Input size (MB): 0.19
+Forward/backward pass size (MB): 219.96
+Params size (MB): 7.22
+Estimated Total Size (MB): 227.37
+----------------------------------------------------------------
diff --git a/trained-models/jacquard-d-grconvnet3-drop0-ch32/epoch_44_iou_0.93 b/trained-models/jacquard-d-grconvnet3-drop0-ch32/epoch_44_iou_0.93
diff --git a/trained-models/jacquard-d-grconvnet3-drop0-ch32/epoch_48_iou_0.93 b/trained-models/jacquard-d-grconvnet3-drop0-ch32/epoch_48_iou_0.93
diff --git a/trained-models/jacquard-d-grconvnet3-drop0-ch32/epoch_50_iou_0.94 b/trained-models/jacquard-d-grconvnet3-drop0-ch32/epoch_50_iou_0.94
diff --git a/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/arch.txt b/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/arch.txt
@@ -0,0 +1,53 @@
+----------------------------------------------------------------
+        Layer (type)               Output Shape         Param #
+================================================================
+            Conv2d-1         [-1, 32, 224, 224]          10,400
+       BatchNorm2d-2         [-1, 32, 224, 224]              64
+            Conv2d-3         [-1, 64, 112, 112]          32,832
+       BatchNorm2d-4         [-1, 64, 112, 112]             128
+            Conv2d-5          [-1, 128, 56, 56]         131,200
+       BatchNorm2d-6          [-1, 128, 56, 56]             256
+            Conv2d-7          [-1, 128, 56, 56]         147,584
+       BatchNorm2d-8          [-1, 128, 56, 56]             256
+            Conv2d-9          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-10          [-1, 128, 56, 56]             256
+    ResidualBlock-11          [-1, 128, 56, 56]               0
+           Conv2d-12          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-13          [-1, 128, 56, 56]             256
+           Conv2d-14          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-15          [-1, 128, 56, 56]             256
+    ResidualBlock-16          [-1, 128, 56, 56]               0
+           Conv2d-17          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-18          [-1, 128, 56, 56]             256
+           Conv2d-19          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-20          [-1, 128, 56, 56]             256
+    ResidualBlock-21          [-1, 128, 56, 56]               0
+           Conv2d-22          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-23          [-1, 128, 56, 56]             256
+           Conv2d-24          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-25          [-1, 128, 56, 56]             256
+    ResidualBlock-26          [-1, 128, 56, 56]               0
+           Conv2d-27          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-28          [-1, 128, 56, 56]             256
+           Conv2d-29          [-1, 128, 56, 56]         147,584
+      BatchNorm2d-30          [-1, 128, 56, 56]             256
+    ResidualBlock-31          [-1, 128, 56, 56]               0
+  ConvTranspose2d-32         [-1, 64, 113, 113]         131,136
+      BatchNorm2d-33         [-1, 64, 113, 113]             128
+  ConvTranspose2d-34         [-1, 32, 225, 225]          32,800
+      BatchNorm2d-35         [-1, 32, 225, 225]              64
+  ConvTranspose2d-36         [-1, 32, 225, 225]          82,976
+           Conv2d-37          [-1, 1, 224, 224]             129
+           Conv2d-38          [-1, 1, 224, 224]             129
+           Conv2d-39          [-1, 1, 224, 224]             129
+           Conv2d-40          [-1, 1, 224, 224]             129
+================================================================
+Total params: 1,900,900
+Trainable params: 1,900,900
+Non-trainable params: 0
+----------------------------------------------------------------
+Input size (MB): 0.77
+Forward/backward pass size (MB): 170.52
+Params size (MB): 7.25
+Estimated Total Size (MB): 178.53
+----------------------------------------------------------------
diff --git a/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_35_iou_0.92 b/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_35_iou_0.92
diff --git a/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_42_iou_0.93 b/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_42_iou_0.93
diff --git a/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_48_iou_0.93 b/trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_48_iou_0.93
diff --git a/utils/dataset_processing/grasp.py b/utils/dataset_processing/grasp.py
@@ -323,17 +323,14 @@ def scale(self, factor):
             return
         self.points *= factor
 
-    def plot(self, ax, q, color=None):
+    def plot(self, ax, color=None):
         """
         Plot grasping rectangle.
         :param ax: Existing matplotlib axis
-        :param q: Grasp quality
         :param color: matplotlib color code (optional)
         """
         points = np.vstack((self.points, self.points[0]))
         ax.plot(points[:, 1], points[:, 0], color=color)
-        ax.plot(self.center[1], self.center[0], 'o', color=color)
-        ax.legend(['score: {0:.2f}'.format(q)])
 
     def zoom(self, factor, center):
         """
@@ -356,10 +353,9 @@ class Grasp:
     A Grasp represented by a center pixel, rotation angle and gripper width (length)
     """
 
-    def __init__(self, center, angle, quality, length=60, width=30):
+    def __init__(self, center, angle, length=60, width=30):
         self.center = center
         self.angle = angle  # Positive angle means rotate anti-clockwise from horizontal.
-        self.quality = quality
         self.length = length
         self.width = width
 
@@ -405,7 +401,7 @@ def plot(self, ax, color=None):
         :param ax: Existing matplotlib axis
         :param color: (optional) color
         """
-        self.as_gr.plot(ax, self.quality, color)
+        self.as_gr.plot(ax, color)
 
     def to_jacquard(self, scale=1):
         """
@@ -428,16 +424,15 @@ def detect_grasps(q_img, ang_img, width_img=None, no_grasps=1):
     :param no_grasps: Max number of grasps to return
     :return: list of Grasps
     """
-    local_max = peak_local_max(q_img, min_distance=10, threshold_abs=0.02, num_peaks=no_grasps)
+    local_max = peak_local_max(q_img, min_distance=20, threshold_abs=0.2, num_peaks=no_grasps)
 
     grasps = []
     for grasp_point_array in local_max:
         grasp_point = tuple(grasp_point_array)
 
         grasp_angle = ang_img[grasp_point]
-        grasp_quality = q_img[grasp_point]
 
-        g = Grasp(grasp_point, grasp_angle, grasp_quality)
+        g = Grasp(grasp_point, grasp_angle)
         if width_img is not None:
             g.length = width_img[grasp_point]
             g.width = g.length / 2