-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
147 lines (138 loc) · 5.18 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import argparse
from typing import List
import matplotlib.pyplot as plt
import numpy as np
def parse_args() -> argparse.Namespace:
"""
Parses the command line arguments. Also produces the help message.
"""
parser = argparse.ArgumentParser(description="Agent that plays flappy bird")
parser.add_argument(
"--verbose",
action="store_true",
help="Adds verbosity on each step",
)
parser.add_argument(
"--enable_inner_progress_bar",
action="store_true",
help="Displays progress bars within each episode",
)
parser.add_argument(
"--run_cross_val",
action="store_true",
help="Runs a cross validation on the parameters (alpha, beta) instead of performing a regular run.",
)
parser.add_argument(
"--n_experiments",
type=int,
default=10,
help="Number of experiments to average on",
)
parser.add_argument(
"--max_steps",
type=int,
default=10000,
help="Maximum number of steps in an episode",
)
parser.add_argument(
"--min_tree_depth",
type=int,
default=20,
help="Minimal tree depth. Strongly affects the score function computed using the 'exact' heuristic.",
)
parser.add_argument(
"--alpha",
type=float,
default=0.0,
help="Strength of the regularization towards y close to 0.5 and vy close to 0.",
)
parser.add_argument(
"--beta",
type=float,
default=0.3,
help="Factor between 0 and 1 that balances between the two regularizing factors.",
)
parser.add_argument(
"--heuristic",
type=str,
default="convex",
choices=["convex", "geometric", "exact"],
help="Parameter that selects the heuristic used to guide the bird towards the center with a small velocity.",
)
return parser.parse_args()
def display_results(
rewards: np.ndarray,
n_steps: np.ndarray,
alpha_list: List[float],
beta_list: List[float],
max_steps: int,
) -> None:
"""
Displays the results obtained through the cross-validation experiment on alpha and beta.
Operates in three steps: prints the results obtained for each set of parameters, displays them in a matrix and
gives the best parameters.
"""
mean_rewards = rewards.mean(axis=-1)
mean_n_steps = n_steps.mean(axis=-1)
success_rates = (n_steps == max_steps).mean(axis=-1)
for alpha_idx, alpha in enumerate(alpha_list):
for beta_idx, beta in enumerate(beta_list):
print(
f"Parameters - alpha: {alpha}, beta: {beta}: success rate: {success_rates[alpha_idx, beta_idx]:.2f}\n"
f"Rewards: {mean_rewards[alpha_idx, beta_idx]:.2f} "
f"+/- {1.96 * rewards[alpha_idx, beta_idx, :].std():.2f} "
f"[{rewards[alpha_idx, beta_idx, :].min():.2f}, {rewards[alpha_idx, beta_idx, :].max():.2f}]\n"
f"Number of steps: {mean_n_steps[alpha_idx, beta_idx]:.2f} "
f"+/- {1.96 * n_steps[alpha_idx, beta_idx, :].std():.2f} "
f"[{n_steps[alpha_idx, beta_idx, :].min():.2f}, {n_steps[alpha_idx, beta_idx, :].max():.2f}]\n"
)
# matrix plot
# noinspection PyArgumentEqualDefault
fig, axes = plt.subplots(1, 3, figsize=(8 * len(beta_list), 2 * len(alpha_list)))
# noinspection PyUnresolvedReferences
axes[0].matshow(mean_rewards, cmap=plt.cm.Oranges)
# noinspection PyUnresolvedReferences
axes[1].matshow(mean_n_steps, cmap=plt.cm.Oranges)
# noinspection PyUnresolvedReferences
axes[2].matshow(success_rates, cmap=plt.cm.Oranges)
for row in range(mean_n_steps.shape[1]):
for col in range(mean_n_steps.shape[0]):
# not the same convention between numpy and matshow (column-major)
axes[0].text(
row, col, f"{mean_rewards[col, row]:.2f}", va="center", ha="center"
)
axes[1].text(
row, col, f"{mean_n_steps[col, row]:.2f}", va="center", ha="center"
)
axes[2].text(
row,
col,
f"{success_rates[col, row] * 100:.2f}%",
va="center",
ha="center",
)
for ax in axes:
ax.set(ylabel=r"$\alpha$", xlabel=r"$\beta$")
ax.set_xticks(range(len(beta_list)), beta_list)
ax.set_yticks(range(len(alpha_list)), alpha_list)
plt.suptitle(
f"{n_steps.shape[-1]} experiments with at most {max_steps} steps.",
)
axes[0].set(
title=f"Mean reward",
)
axes[1].set(
title=f"Mean number of steps",
)
axes[2].set(
title=f"Mean success rates",
)
plt.show()
# best parameters
best_alpha, best_beta = np.unravel_index(mean_rewards.argmax(), n_steps.shape[:-1])
print(
f"Best parameters (in terms of rewards) - alpha: {alpha_list[best_alpha]}, beta: {beta_list[best_beta]}\n"
f"Number of steps: {n_steps[best_alpha, best_beta, :].mean():.2f} "
f"+/- {1.96 * n_steps[best_alpha, best_beta, :].std():.2f} "
f"[{n_steps[best_alpha, best_beta, :].min():.2f}, {n_steps[best_alpha, best_beta, :].max():.2f}]\n"
)