This repository has been archived by the owner on Sep 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
DQNetwork.py
executable file
·97 lines (74 loc) · 3.31 KB
/
DQNetwork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import *
import numpy as np
class DQNetwork:
def __init__(self, actions, input_shape, alpha = 0.1, gamma = 0.99, dropout_prob = 0.1, load_path = '', logger=None):
self.model = Sequential()
self.actions = actions # Size of the network output
self.gamma = gamma
self.alpha = alpha
self.dropout_prob = dropout_prob
# Define neural network
self.model.add(BatchNormalization(axis=1, input_shape=input_shape))
self.model.add(Convolution2D(32, 2, 2, border_mode='valid', subsample=(2, 2)))
self.model.add(Activation('relu'))
self.model.add(BatchNormalization(axis=1))
self.model.add(Convolution2D(64, 2, 2, border_mode='valid', subsample=(2, 2), dim_ordering="th"))
self.model.add(Activation('relu'))
self.model.add(BatchNormalization(axis=1))
self.model.add(Convolution2D(64, 3, 3, border_mode='valid', subsample=(2, 2)))
self.model.add(Activation('relu'))
self.model.add(BatchNormalization(axis=1))
self.model.add(Convolution2D(64, 2, 2, border_mode='valid', subsample=(2, 2)))
self.model.add(Activation('relu'))
self.model.add(Flatten())
self.model.add(BatchNormalization(mode=1))
self.model.add(Dropout(self.dropout_prob))
self.model.add(Dense(1024))
self.model.add(Activation('relu'))
self.model.add(BatchNormalization(mode=1))
self.model.add(Dropout(self.dropout_prob))
self.model.add(Dense(512))
self.model.add(Activation('relu'))
self.model.add(BatchNormalization(mode=1))
self.model.add(Dropout(self.dropout_prob))
self.model.add(Dense(256))
self.model.add(Activation('relu'))
self.model.add(Dense(self.actions))
self.optimizer = Adam()
self.logger = logger
# Load the netwrok from saved model
if load_path != '':
self.load(load_path)
self.model.compile(loss = 'mean_squared_error', optimizer = self.optimizer, metrics = ['accuracy'])
def train(self, batch):
# Generate the xs and targets for the given batch, train the model on them
x_train = []
t_train = []
# Generate training set and targets
for datapoint in batch:
x_train.append(datapoint['source'])
# Get the current Q-values for the next state and select the best
next_state_pred = list(self.predict(datapoint['dest']).squeeze())
next_a_Q_value = np.max(next_state_pred)
# Set the target so that error will be 0 on all actions except the one taken
t = list(self.predict(datapoint['source'])[0])
t[datapoint['action']] = (datapoint['reward'] + self.gamma * next_a_Q_value) if not datapoint['final'] else datapoint['reward']
t_train.append(t)
print next_state_pred # Print a prediction so to have an idea of the Q-values magnitude
x_train = np.asarray(x_train).squeeze()
t_train = np.asarray(t_train).squeeze()
history = self.model.fit(x_train, t_train, batch_size=32, nb_epoch=1)
self.logger.to_csv('loss_history.csv', history.history['loss'])
def predict(self, state):
# Feed state into the model, return predicted Q-values
return self.model.predict(state, batch_size=1)
def save(self, filename = None):
# Save the model and its weights to disk
print 'Saving...'
self.model.save_weights(self.logger.path + ('model.h5' if filename is None else filename))
def load(self, path):
# Load the model and its weights from path
print 'Loading...'
self.model.load_weights(path)