-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_twoheadgraspnet.py
110 lines (91 loc) · 4.02 KB
/
test_twoheadgraspnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
from collections import deque
import random
import torch
from torch import nn
import os
from env import VrepEnvironment
#import params
from matplotlib import pyplot as plt
from IPython.display import clear_output
from scipy import ndimage
from models import OneHeadNetwork, TwoHeadGraspNetwork
class DQN:
def __init__(self, model_path, env, lr, batch_size, gamma, eps_decay, eps_start, eps_end, initial_memory, memory_size):
self.env = env
self.model_path = model_path
self.lr = lr
self.gamma = gamma
self.eps_decay = eps_decay
self.eps_start = eps_start
self.eps_end = eps_end
self.initial_memory = initial_memory
self.replay_buffer = deque(maxlen=memory_size)
self.batch_size = batch_size
self.num_actions = 112*112*16
self.num_action_orientations = 16
self.num_action_positions = 112
self.input_shape = [224, 224]
self.model = TwoHeadGraspNetwork()
self.model.load_state_dict(torch.load(self.model_path))
def agent_policy(self, state):
input_image, input_depth = self.transform_data(state)
q_value_orient, q_value_loc = self.model(torch.from_numpy(input_image), torch.from_numpy(input_depth))
action_orientaton_idx = np.argmax(q_value_orient.detach().numpy()).squeeze()
action_location_idx = np.argmax(q_value_loc.detach().numpy()).squeeze()
action = [action_location_idx, action_orientaton_idx]
print(action)
return action
def transform_data(self, state):
color_heightmap, valid_depth_heightmap, _ = state
color_heightmap_2x = color_heightmap#ndimage.zoom(color_heightmap, zoom=[2,2,1], order=0)
depth_heightmap_2x = valid_depth_heightmap #ndimage.zoom(valid_depth_heightmap, zoom=[2,2], order=0)
image_mean = [0.485, 0.456, 0.406]
image_std = [0.229, 0.224, 0.225]
input_color_image = color_heightmap_2x.astype(float)/255
for c in range(3):
input_color_image[:,:,c] = (input_color_image[:,:,c] - image_mean[c])/image_std[c]
# Pre-process depth image (normalize)
image_mean = [0.01, 0.01, 0.01]
image_std = [0.03, 0.03, 0.03]
depth_heightmap_2x.shape = (depth_heightmap_2x.shape[0], depth_heightmap_2x.shape[1], 1)
input_depth_image = np.concatenate((depth_heightmap_2x, depth_heightmap_2x, depth_heightmap_2x), axis=2)
for c in range(3):
input_depth_image[:,:,c] = (input_depth_image[:,:,c] - image_mean[c])/image_std[c]
input_image = np.transpose(np.expand_dims(input_color_image.astype(np.float32), axis=0),(0, 3, 1, 2))
input_depth = np.transpose(np.expand_dims(input_depth_image.astype(np.float32), axis=0), (0, 3, 1, 2))
return input_image, input_depth
def test(self):
#self.model.cuda().train()
self.model.eval()
for episode in range(num_episodes):
state = env.reset()
reward_for_episode = 0
while True:
received_action = self.agent_policy(state)
# print("received_action:", received_action)
next_state, reward, terminal = env.twohead_grasp_step(received_action, state[1], state[2])
# add up rewards
reward_for_episode += reward
state = next_state
if terminal:
print("Episode: {} done, Reward: {}".format(episode, reward_for_episode))
break
env.close()
if __name__ == "__main__":
env = VrepEnvironment(is_testing=False,
)
# setting up params
lr = 0.0001
batch_size = 8
eps_decay = 30000
eps_start = 0.5
eps_end = 0.1
initial_memory = 500
memory_size = 1500#20 * initial_memory
gamma = 0.5 # discount 0.5
num_episodes = 600
model_path = "weights/steps_10401.pth"
print('Start evaluating')
model = DQN(model_path, env, lr, batch_size, gamma, eps_decay, eps_start, eps_end,initial_memory, memory_size)
model.test()