-
Notifications
You must be signed in to change notification settings - Fork 2
/
Anchor_k-means.py
176 lines (152 loc) · 7.73 KB
/
Anchor_k-means.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# -*- coding: utf-8 -*-k-means方法求锚框尺寸。
"""Created on Wed Aug 21 11:32:40 2019 @author: FanXudong"""
from os.path import join
import argparse
import numpy as np
import sys
import os
import random, pdb
from keras_frcnn.coco import COCO
def IOU(x, centroids):
'''
:param x: 某一个ground truth的w,h
:param centroids: anchor的w,h的集合[(w,h),(),...],共k个
:return: 单个ground truth box与所有k个anchor box的IoU值集合
'''
IoUs = []
w, h = x # ground truth的w,h
for centroid in centroids:
c_w, c_h = centroid # anchor的w,h
if c_w >= w and c_h >= h: # anchor包围ground truth
iou = w * h / (c_w * c_h)
elif c_w >= w and c_h <= h: # anchor宽矮
iou = w * c_h / (w * h + (c_w - w) * c_h)
elif c_w <= w and c_h >= h: # anchor瘦长
iou = c_w * h / (w * h + c_w * (c_h - h))
else: # ground truth包围anchor means both w,h are bigger than c_w and c_h respectively
iou = (c_w * c_h) / (w * h)
IoUs.append(iou) # will become (k,) shape
return np.array(IoUs)
def avg_IOU(X, centroids):
'''
:param X: ground truth的w,h的集合[(w,h),(),...]
:param centroids: anchor的w,h的集合[(w,h),(),...],共k个
'''
n, d = X.shape
sum = 0.
for i in range(X.shape[0]):
sum += max(IOU(X[i], centroids)) # 返回一个ground truth与所有anchor的IoU中的最大值
return sum / n # 对所有ground truth求平均
def write_anchors_to_file(centroids, X, anchor_file, input_shape, yolo_version,num_Anchors=0):
'''
:param centroids: anchor的w,h的集合[(w,h),(),...],共k个
:param X: ground truth的w,h的集合[(w,h),(),...]
:param anchor_file: anchor和平均IoU的输出路径
'''
f = open(anchor_file, 'w')
anchors = centroids.copy()
print(anchors.shape)
if yolo_version == 'yolov2':
for i in range(anchors.shape[0]):
# yolo中对图片的缩放倍数为32倍,所以这里除以32,
# 如果网络架构有改变,根据实际的缩放倍数来
# 求出anchor相对于缩放32倍以后的特征图的实际大小(yolov2)
anchors[i][0] *= input_shape / 32.
anchors[i][1] *= input_shape / 32.
elif yolo_version == 'yolov3':
for i in range(anchors.shape[0]):
# 求出yolov3相对于原图的实际大小
anchors[i][0] *= input_shape
anchors[i][1] *= input_shape
else:
print("the yolo version is not right!")
exit(-1)
widths = anchors[:, 0]
sorted_indices = np.argsort(widths)
print('Anchors = ', anchors[sorted_indices])
for i in sorted_indices[:-1]:
f.write('%0.2f,%0.2f, ' % (anchors[i, 0], anchors[i, 1]))
# there should not be comma after last anchor, that's why
f.write('%0.2f,%0.2f\n' % (anchors[sorted_indices[-1:], 0], anchors[sorted_indices[-1:], 1]))
f.write('avg IoU: %f\n\n' % (avg_IOU(X, centroids)))
f.write('Anchors_FRCNN_NIRPed =[')
for i in sorted_indices:
f.write('[%d,%d], ' % (anchors[i, 0]*640/256, float(anchors[i, 1])))#TODO:对Ours训练集标记统计结果
f.write('] # num_Anchors={}'.format(num_Anchors))
print()
def k_means(X, centroids, eps, anchor_file, input_shape, yolo_version):
N = X.shape[0] # ground truth的个数
iterations = 200
print("centroids.shape", centroids)
k, dim = centroids.shape # anchor的个数k以及w,h两维,dim默认等于2
prev_assignments = np.ones(N) * (-1) # 对每个ground truth分配初始标签
iter = 0
old_D = np.zeros((N, k)) # 初始化每个ground truth对每个anchor的IoU
while iter < iterations:
D = []
iter += 1
for i in range(N):
d = 1 - IOU(X[i], centroids)
D.append(d)
D = np.array(D) # D.shape = (N,k) 得到每个ground truth对每个anchor的IoU
print("iter {}: dists = {}".format(iter, np.sum(np.abs(old_D - D)))) # 计算每次迭代和前一次IoU的变化值
# assign samples to centroids
assignments = np.argmin(D, axis=1) # 将每个ground truth分配给距离d最小的anchor序号
if (assignments == prev_assignments).all(): # 如果前一次分配的结果和这次的结果相同,就输出anchor以及平均IoU
print("Centroids = ", centroids)
write_anchors_to_file(centroids, X, anchor_file, input_shape, yolo_version,N)
return
# calculate new centroids
centroid_sums = np.zeros((k, dim), np.float64) # 初始化以便对每个簇的w,h求和
for i in range(N):
centroid_sums[assignments[i]] += X[i] # 将每个簇中的ground truth的w和h分别累加
for j in range(k): # 对簇中的w,h求平均
centroids[j] = centroid_sums[j] / (np.sum(assignments == j) + 1)
prev_assignments = assignments.copy()
old_D = D.copy()
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument('-annFile', default=r'.\data\train.json', help='path to coco annFile\n') # 这个文件是由运行scripts文件夹中的
# python voc_label.py
# 目前yolo打标签可以使用labelimg中的yolo格式
parser.add_argument('-output_dir', default=r'.\anchors', type=str, help='Output anchor directory\n')
parser.add_argument('-num_clusters', default=5, type=int, help='number of clusters\n')
# parser.add_argument('-num_clusters', default=9, type=int, help='number of clusters\n')
'''
需要注意的是yolov2输出的值比较小是相对特征图来说的, yolov3输出值较大是相对原图来说的, 所以yolov2和yolov3的输出是有区别的
'''
parser.add_argument('-yolo_version', default='yolov3', type=str, help='yolov2 or yolov3\n')
parser.add_argument('-yolo_input_shape', default=8*32, type=int, help='input images shape,multiples of 32. etc. 352*352\n')
args = parser.parse_args()
annotation_dims = []
if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)
cocoGt = COCO(args.annFile)
imgIds = sorted(cocoGt.getImgIds()) #imgIds=[7000000, 7000001, 7000002, 7000003, 7000004, 7000005,...]
for i in range(len(imgIds)):
image = cocoGt.loadImgs(ids=imgIds[i])[0]
cols, rows = image['width'], image['height'] # TODO:NightOwls图像尺寸: cols, rows = 1920, 1024
anno_ids = cocoGt.getAnnIds(imgIds=imgIds[i]) # anno_ids= [7000000, 7000001]
annos = cocoGt.loadAnns(ids=anno_ids)
if anno_ids == []:
continue
for anno in annos:
cat = cocoGt.loadCats(ids=anno['category_id'])[0] # cat={'name': 'pedestrian', 'id': 1}
class_name = cat['name']
if class_name not in ['Pedestrian', 'pedestrian', 'Ped', 'ped']:
continue
if anno['Dis'] > 80:
continue
bbox = anno['bbox']
annotation_dims.append([float(bbox[2]/cols), float(bbox[3]/rows)])
num_annos = len(annotation_dims)
print('#annos=%d' % num_annos)
annotation_dims = np.array(annotation_dims) # 保存所有ground truth框的(w,h)
eps = 0.005
anchor_file = join(args.output_dir, '%s_%danchors%d.txt' % (args.yolo_version, args.num_clusters, args.yolo_input_shape))
indices = [random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)]
centroids = annotation_dims[indices]
k_means(annotation_dims, centroids, eps, anchor_file, args.yolo_input_shape, args.yolo_version)
print('centroids.shape', centroids.shape)
if __name__ == "__main__":
main(sys.argv)