Skip to content

Commit

Permalink
add multiclass-batch inference and refactor dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
jhultman committed Mar 10, 2020
1 parent 575c555 commit 0789508
Show file tree
Hide file tree
Showing 16 changed files with 473 additions and 234 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ build/*
dist/*
pvrcnn.egg-info/*

notebooks/*
.ipynb_checkpoints/*

pvrcnn/eval/*
pvrcnn/core/viz_detections.py
pvrcnn/make_dataset.py

images/proposals_*
notebooks/*
3 changes: 2 additions & 1 deletion configs/second/car.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ ANCHORS: [{
wlh: [1.6, 3.9, 1.56],
yaw: [0, 1.501],
iou_thresh: [0.45, 0.60],
score_thresh: 0.3,
center_z: -1.0,
}]
NUM_CLASSES: 1
TRAIN:
BATCH_SIZE: 5
BATCH_SIZE: 4
LAMBDA: 1.0
EPOCHS: 60
AUG:
Expand Down
84 changes: 84 additions & 0 deletions pvrcnn/core/bev_drawer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import cv2
import numpy as np

from .geometry import box3d_to_bev_corners


def clipped_percentile(x, p=1):
"""Transform to unit interval robustly."""
p0, p1 = np.percentile(x, [p, 100 - p])
x = (np.clip(x, p0, p1) - p0) / (p1 - p0 + 1e-1)
return x


def make_bev_map(points, pixel_size, bounds):
"""Scatter points to create sparse occupancy image."""
mask = ((points > bounds[:2]) & (points < bounds[2:])).all(1)
shape = np.int32(np.ceil((bounds[2:] - bounds[:2]) / pixel_size))[::-1]
pixels = np.int32(np.floor((points[mask] - bounds[:2]) / pixel_size))
pixels, counts = np.unique(pixels, return_counts=True, axis=0)
bev_map = np.zeros(shape, dtype=np.float32)
bev_map[tuple(pixels[:, ::-1].T)] = counts
bev_map = clipped_percentile(bev_map)
return bev_map


class Drawer:
"""Draw BEV occupancy map with boxes. Store in image attribute."""

def __init__(self,
points,
boxes=[],
labels=[],
pixel_size=np.r_[0.1, 0.1],
bounds=np.r_[0, -30, 60, 30]):
self.pixel_size = pixel_size
self.bounds = bounds
self.line_kw = dict(thickness=2)
self.text_kw = dict(
fontScale=0.6,
thickness=2,
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
)
self.image = self.build_bev(points)
self.draw(boxes, labels)

def build_bev(self, points):
image = make_bev_map(
points[:, :2], self.pixel_size, self.bounds)
image = (image * 255).astype(np.uint8)
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
return image

def get_text_color(self, n):
color = np.tile(np.r_[255, 0, 0][None], (n, 1))
color = [list(map(int, c)) for c in color]
return color

def get_line_color(self, n):
color = np.tile(np.r_[0, 255, 0][None], (n, 1))
color = [list(int(c) for c in ci) for ci in color]
return color

def draw_text(self, locs, labels):
colors = self.get_text_color(locs.shape[0])
locs = map(tuple, locs.astype(np.int32).tolist())
for label, loc, color in zip(labels, locs, colors):
cv2.putText(self.image, label, loc, color=color, **self.text_kw)

def draw_lines(self, lines):
colors = self.get_line_color(lines.shape[0])
lines = map(tuple, lines.astype(np.int32).tolist())
for line, color in zip(lines, colors):
cv2.line(self.image, line[0:2], line[2:4], color, **self.line_kw)

def draw(self, boxes_, labels):
"""No support yet for labels."""
for boxes in boxes_:
extent = self.bounds[2:] - self.bounds[:2]
factor = np.r_[self.image.shape[:2]][::-1] / extent
corners = (box3d_to_bev_corners(boxes) - self.bounds[:2]) * factor
line_loc = corners[:, [1, 2, 2, 3, 3, 0]].reshape(-1, 4)
text_loc = corners[:, 0] + 0.3
self.draw_lines(line_loc)
self.draw_text(text_loc, labels=[])
36 changes: 36 additions & 0 deletions pvrcnn/core/box_encode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import torch
import math


def _anchor_diagonal(A_wlh):
"""Reference: VoxelNet."""
A_wl, A_h = A_wlh.split([2, 1], -1)
A_norm = A_wl.norm(dim=-1, keepdim=True).expand_as(A_wl)
A_norm = torch.cat((A_norm, A_h), dim=-1)
return A_norm


def decode(deltas, anchors):
"""Both inputs of shape (*, 7)."""
P_xyz, P_wlh, P_yaw = deltas.split([3, 3, 1], -1)
A_xyz, A_wlh, A_yaw = anchors.split([3, 3, 1], -1)
A_norm = _anchor_diagonal(A_wlh)
boxes = torch.cat((
(P_xyz * A_norm + A_xyz),
(P_wlh.exp() * A_wlh),
(P_yaw + A_yaw)), dim=-1
)
return boxes


def encode(boxes, anchors):
"""Both inputs of shape (*, 7)."""
G_xyz, G_wlh, G_yaw = boxes.split([3, 3, 1], -1)
A_xyz, A_wlh, A_yaw = anchors.split([3, 3, 1], -1)
A_norm = _anchor_diagonal(A_wlh)
deltas = torch.cat((
(G_xyz - A_xyz) / A_norm,
(G_wlh / A_wlh).log(),
(G_yaw - A_yaw) % math.pi), dim=-1
)
return deltas
3 changes: 3 additions & 0 deletions pvrcnn/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,23 @@
'wlh': [1.6, 3.9, 1.56],
'yaw': [0, np.pi / 2],
'iou_thresh': [0.45, 0.60],
'score_thresh': 0.3,
'center_z': -1.0,
},
{
'names': ['Pedestrian', 'Person_sitting'],
'wlh': [0.6, 0.8, 1.73],
'yaw': [0, np.pi / 2],
'iou_thresh': [0.20, 0.35],
'score_thresh': 0.3,
'center_z': -0.6,
},
{
'names': ['Cyclist'],
'wlh': [0.6, 1.76, 1.73],
'yaw': [0, np.pi / 2],
'iou_thresh': [0.20, 0.35],
'score_thresh': 0.3,
'center_z': -0.6,
},
]
Expand Down
65 changes: 65 additions & 0 deletions pvrcnn/core/geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import numpy as np


def points_in_convex_polygon(points, polygon, ccw=True):
"""points (N, 2) | polygon (M, V, 2) | mask (N, M)"""
polygon_roll = np.roll(polygon, shift=1, axis=1)
polygon_side = (-1) ** ccw * (polygon - polygon_roll)[None]
vertex_to_point = polygon[None] - points[:, None, None]
mask = (np.cross(polygon_side, vertex_to_point) > 0).all(2)
return mask


def box3d_to_bev_corners(boxes):
"""
:boxes np.ndarray shape (N, 7)
:corners np.ndarray shape (N, 4, 2) (ccw)
"""
xy, _, wl, _, yaw = np.split(boxes, [2, 3, 5, 6], 1)
c, s = np.cos(yaw), np.sin(yaw)
R = np.stack([c, -s, s, c], -1).reshape(-1, 2, 2)
corners = 0.5 * np.r_[-1, -1, +1, -1, +1, +1, -1, +1]
corners = (wl[:, None] * corners.reshape(4, 2))
corners = np.einsum('ijk,imk->imj', R, corners) + xy[:, None]
return corners


class PointsInCuboids:
"""Takes ~10ms for each scene."""

def __init__(self, points):
self.points = points

def _height_threshold(self, boxes):
"""Filter to z slice."""
z1 = self.points[:, None, 2]
z2, h = boxes[:, [2, 5]].T
mask = (z1 > z2 - h / 2) & (z1 < z2 + h / 2)
return mask

def _get_mask(self, boxes):
polygons = box3d_to_bev_corners(boxes)
mask = self._height_threshold(boxes)
mask &= points_in_convex_polygon(
self.points[:, :2], polygons)
return mask

def __call__(self, boxes):
"""Return list of points in each box."""
mask = self._get_mask(boxes).T
points = list(map(self.points.__getitem__, mask))
return points


class PointsNotInRectangles(PointsInCuboids):

def _get_mask(self, boxes):
polygons = box3d_to_bev_corners(boxes)
mask = points_in_convex_polygon(
self.points[:, :2], polygons)
return mask

def __call__(self, boxes):
"""Return array of points not in any box."""
mask = ~self._get_mask(boxes).any(1)
return self.points[mask]
38 changes: 14 additions & 24 deletions pvrcnn/core/proposal_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pvrcnn.ops import box_iou_rotated, Matcher
from .anchor_generator import AnchorGenerator
from .box_encode import encode


class ProposalTargetAssigner(nn.Module):
Expand Down Expand Up @@ -34,34 +35,17 @@ def compute_iou(self, boxes, anchors):

def get_cls_targets(self, G_cls):
"""
1. Disable ambiguous (matched to multiple classes).
2. Replace ignore marker (-1) with binary mask.
Clamps ignore to 0 and represents with binary mask.
Note: allows anchor to be matched to multiple classes.
"""
ambiguous = G_cls.eq(1).int().sum(0) > 1
G_cls[:, ambiguous] = -1
M_cls = G_cls.ne(-1)
G_cls = G_cls.clamp_(min=0)
return G_cls, M_cls

def _encode_diagonal(self, A_wlh):
A_wl, A_h = A_wlh.split([2, 1], -1)
A_norm = A_wl.norm(dim=-1, keepdim=True).expand(-1, 2)
A_norm = torch.cat((A_norm, A_h), -1)
return A_norm

def get_reg_targets(self, boxes, box_idx, G_cls):
"""Standard VoxelNet-style box encoding."""
M_reg = G_cls == 1
A = self.anchors[M_reg]
G = boxes[box_idx[M_reg]].cuda()
G_xyz, G_wlh, G_yaw = G.split([3, 3, 1], -1)
A_xyz, A_wlh, A_yaw = A.split([3, 3, 1], -1)
A_norm = self._encode_diagonal(A_wlh)
G_reg = torch.cat((
(G_xyz - A_xyz) / A_norm,
(G_wlh / A_wlh).log(),
(G_yaw - A_yaw) % math.pi), dim=-1
)
G_reg = encode(boxes[box_idx[M_reg]], self.anchors[M_reg])
M_reg = M_reg.unsqueeze(-1)
G_reg = torch.zeros_like(self.anchors).masked_scatter_(M_reg, G_reg)
return G_reg, M_reg
Expand All @@ -82,17 +66,23 @@ def apply_ignore_mask(self, matches, labels, box_ignore):

def match_all_classes(self, boxes, class_idx, box_ignore):
"""Match boxes to anchors based on IOU."""
full_idx = torch.arange(boxes.shape[0])
full_idx = torch.arange(boxes.shape[0], device=boxes.device)
classes = range(self.cfg.NUM_CLASSES)
matches, labels = zip(*[self.match_class_i(
boxes, class_idx, full_idx, i) for i in classes])
matches = torch.stack(matches).view(self.anchors.shape[:-1])
labels = torch.stack(labels).view(self.anchors.shape[:-1])
return matches, labels

def to_device(self, item):
"""Move items to anchors.device for fast rotated IOU."""
keys = ['boxes', 'class_idx', 'box_ignore']
items = [item[k].to(self.anchors.device) for k in keys]
return items

def forward(self, item):
box_idx, G_cls = self.match_all_classes(
item['boxes'].cuda(), item['class_idx'], item['box_ignore'])
boxes, class_idx, box_ignore = self.to_device(item)
box_idx, G_cls = self.match_all_classes(boxes, class_idx, box_ignore)
G_cls, M_cls = self.get_cls_targets(G_cls)
G_reg, M_reg = self.get_reg_targets(item['boxes'], box_idx, G_cls)
G_reg, M_reg = self.get_reg_targets(boxes, box_idx, G_cls)
item.update(dict(G_cls=G_cls, G_reg=G_reg, M_cls=M_cls, M_reg=M_reg))
55 changes: 54 additions & 1 deletion pvrcnn/dataset/augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@
import os.path as osp
import numpy as np
import torch
from tqdm import tqdm
from collections import defaultdict
from itertools import compress

from .database_sampler import PointsNotInRectangles
from .kitti_utils import read_velo
from pvrcnn.ops import box_iou_rotated
from pvrcnn.core.geometry import (
points_in_convex_polygon,
PointsNotInRectangles,
PointsInCuboids,
)


class Augmentation:
Expand Down Expand Up @@ -107,6 +114,7 @@ def __call__(self, points, boxes):


class SampleAugmentation(Augmentation):
"""Pastes samples from database into scene."""

def __init__(self, cfg):
super(SampleAugmentation, self).__init__(cfg)
Expand Down Expand Up @@ -188,3 +196,48 @@ def __call__(self, points, boxes, class_idx):
points, boxes, class_idx = self.cat_samples(
samples, points, boxes, class_idx)
return points, boxes, class_idx


class DatabaseBuilder:
"""Builds cached database for SampleAugmentation."""

def __init__(self, cfg, annotations):
self.cfg = cfg
self.fpath = osp.join(cfg.DATA.CACHEDIR, 'database.pkl')
if osp.isfile(self.fpath):
print(f'Found cached database: {self.fpath}')
return
self._build(annotations)

def _build(self, annotations):
database = defaultdict(list)
for item in tqdm(annotations.values(), desc='Building database'):
for key, val in zip(*self._process_item(item)):
database[key] += [val]
self._save_database(dict(database))

def _demean(self, points, boxes):
"""Subtract box center (birds eye view)."""
_points, _boxes = [], []
for points_i, box_i in zip(points, boxes):
center, zwlhr = np.split(box_i, [2])
xy, zi = np.split(points_i, [2], 1)
_points += [np.concatenate((xy - center, zi), 1)]
_boxes += [np.concatenate((0 * center, zwlhr))]
return _points, _boxes

def _process_item(self, item):
"""Retrieve points in each box in scene."""
points = read_velo(item['velo_path'])
class_idx, boxes = item['class_idx'], item['boxes']
points = PointsInCuboids(points)(boxes)
keep = [len(p) > self.cfg.AUG.MIN_NUM_SAMPLE_PTS for p in points]
class_idx, points, boxes = [
compress(t, keep) for t in (class_idx, points, boxes)]
points, boxes = self._demean(points, boxes)
samples = [dict(points=p, box=b) for (p, b) in zip(points, boxes)]
return class_idx, samples

def _save_database(self, database):
with open(self.fpath, 'wb') as f:
pickle.dump(database, f)
Loading

0 comments on commit 0789508

Please sign in to comment.