-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
241 lines (159 loc) · 8.38 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import os
import cv2
from Logger import Logger
import numpy as np
def order_points(pts):
# Initialize a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype="float32")
# The top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# Now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# Return the ordered coordinates
return rect
def correct_perspective(mask_img_path, real_img_path, logger, aspect_ratio=2.35, padding=50):
print('Correcting perspective for image: ' + real_img_path)
logger.info(f'Correcting perspective for image: {real_img_path}\n')
# Load the mask image
mask_img = cv2.imread(mask_img_path, cv2.IMREAD_COLOR)
# Convert the mask image to grayscale
mask_img_gray = cv2.cvtColor(mask_img, cv2.COLOR_BGR2GRAY)
# Threshold the mask image to get a binary image
_, mask_img_bin = cv2.threshold(mask_img_gray, 127, 255, cv2.THRESH_BINARY)
# Define the kernel size
kernel_size = 5
# Define the kernel
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
# Apply morphological opening to the binary mask image
mask_img_opened = cv2.morphologyEx(mask_img_bin, cv2.MORPH_OPEN, kernel)
# Find all connected components in the binary mask image
num_labels, labels_im = cv2.connectedComponents(mask_img_opened)
#log the number of components
logger.info(f'Number of components: {num_labels}')
#print each component
for i in range(num_labels):
logger.info(f'Component {i}: {len(labels_im[labels_im == i])}')
# Compute the area of each component
area = [len(labels_im[labels_im == i]) for i in range(num_labels)]
#log the area of each component
logger.info(f'\nArea of each component: {area}')
# Find the index of the largest component (ignore the background, i.e., index 0)
largest_comp_idx = area[1:].index(max(area[1:])) + 1 # +1 due to background
#log the largest component
logger.info(f'Largest component: {largest_comp_idx}\n')
# Keep only the largest component in the mask image
largest_comp = (labels_im == largest_comp_idx).astype('uint8') * 255
# Find the contours in the binary mask image
contours, _ = cv2.findContours(largest_comp, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#log the number of contours
logger.info(f'\nNumber of contours: {len(contours)}')
#print each contour
for i in range(len(contours)):
logger.info(f'Contour {i}: {len(contours[i])} points, area: {cv2.contourArea(contours[i])}')
# Among all contours, find the one with the maximum area
cnt = max(contours, key=cv2.contourArea)
#log the largest contour
logger.info(f'Largest contour: {len(cnt)} points, area: {cv2.contourArea(cnt)}\n')
# Find the minimum area rectangle that encloses the contour
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = order_points(box) # ensure the points are in the correct order
#log the rectangle
logger.info(f'\nRectangle: {rect}\n')
#log the bounding box
logger.info(f'\nBounding box: \n{box}\n')
# Define the dimensions of the output image
output_width = 500 + 2 * padding # add padding to the width
output_height = round(output_width * aspect_ratio) # adjust the height according to the aspect ratio
output_size = (output_width, output_height)
#log the output width, height and size
logger.info(f'Output width: {output_width}\n Output height: {output_height}\n Output size: {output_size}\n')
# The corners of the rectangle in the output image (adjusted for padding)
dst_pts = np.array([[padding, padding], # top-left corner (subtracted padding)
[output_size[0]-padding-1, padding], # top-right corner (subtracted padding)
[output_size[0]-padding-1, output_size[1]-padding-1], # bottom-right corner (added padding)
[padding, output_size[1]-padding-1]], dtype='float32') # bottom-left corner (added padding)
#log the destination points
logger.info(f'Destination points: \n{dst_pts}\n')
# Compute the perspective transformation matrix
M = cv2.getPerspectiveTransform(box, dst_pts)
#log the perspective transformation matrix
logger.info(f'Perspective transformation matrix: \n{M}\n')
# Load the real image
real_img = cv2.imread(real_img_path, cv2.IMREAD_COLOR)
real_img_name = os.path.basename(real_img_path)
# Illustation code
# Illustration image
illustration_img = real_img.copy()
# Draw the rectangle on the real image for illustration
cv2.drawContours(illustration_img, [box.astype("int")], -1, (0, 255, 0), 3)
# draw the mask on the real image for illustration
cv2.drawContours(illustration_img, [cnt.astype("int")], -1, (0, 0, 255), 3)
# warp the illustration image
illustration_img = cv2.warpPerspective(illustration_img, M, output_size
# Save the illustration image
os.makedirs('output/rect', exist_ok=True)
cv2.imwrite(os.path.join('output/rect', 'Illustration_' + real_img_name), illustration_img)
print('Illustration image saved to: ' + os.path.join('output/rect', 'Illustration_' + real_img_name))
# DONE illustration code
# Apply the perspective transformation to the real image
img_warped = cv2.warpPerspective(real_img, M, output_size)
#log the real image
logger.info(f'Real image: {real_img.shape}')
#log the warped image
logger.info(f'Warped image: {img_warped.shape}')
return img_warped
def main():
# Step 1: Prompt user if okay to delete existing output directory
if os.path.exists('output'):
response = input("Output directory already exists. Do you want to delete it? (y/n): ")
if response != 'n':
os.system('rm -rf output')
else:
print("You decided not to delete the output directory. Continue at your own risk.")
#wait for user to press enter
input("Press Enter to continue...")
# Step 2: Create output directories
if not os.path.exists('output'):
os.makedirs('output')
os.makedirs('output/photos')
os.makedirs('output/logs')
# Step 3: Initialize the logger
logger = Logger('output.log', 'output').get_logger()
# Step 4: Get the list of the mask image paths
mask_image_paths = os.listdir('input/segmentations')
logger.info("Mask image paths: " + str(mask_image_paths))
#Print to log
logger.info("Processing " + str(len(mask_image_paths)) + " images")
for mask_img_name in mask_image_paths:
logger.info("Processing mask: " + mask_img_name)
try:
# Construct paths
mask_img_path = os.path.join('input/segmentations', mask_img_name)
real_img_name = mask_img_name.split('.')[0] + '.jpg'
real_img_path = os.path.join('input/photos', real_img_name)
# Initialize the logger for the current image
individual_logger = Logger(f'logs/{real_img_name.split(".")[0]}.log', 'output').get_logger()
logger.info(f'Correcting image: {real_img_name}')
# Step 5: Call correct_perspective() and save the result
img_warped = correct_perspective(mask_img_path, real_img_path, individual_logger, aspect_ratio=2.35, padding=50)
cv2.imwrite(os.path.join('output', 'photos', real_img_name), img_warped)
individual_logger.info(f'Successfully processed and saved image: {real_img_name}')
except Exception as e:
# Log the error if an exception is thrown
individual_logger = Logger(f'logs/{real_img_name.split(".")[0]}.log', 'output').get_logger()
individual_logger.error(f'Error processing image: {real_img_name}, error: {str(e)}')
# Step 6: Log the completion of all operations
logger.info('All images processed.')
if __name__ == "__main__":
main()