-
Notifications
You must be signed in to change notification settings - Fork 0
/
environment.py
executable file
·253 lines (218 loc) · 10 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
"""
Environment to evaluate a neural network during its lifetime
__author__ = "Joe Sarsfield"
__email__ = "joe.sarsfield@gmail.com"
"""
import numpy as np
from random import randrange
from time import sleep
from feature_dimensions import PerformanceDimension, PhenotypicDimension, GenomicDimension
import math
import sys
import random
def get_env_spaces(gym_env_string):
""" Get environment observation and action space for gym reinforcement environments """
import gym
temp_env = gym.make(gym_env_string)
return temp_env.observation_space.shape[0], 1 if "Discrete" in str(type(temp_env.action_space)) else temp_env.action_space.shape[0]
class Environment:
""" base class for all environments """
def __init__(self, feature_dims=[]):
self.feature_dims = feature_dims
self.setup_feature_dimensions()
def setup_feature_dimensions(self):
self.performance_dims = []
self.phenotypic_dims = []
self.genomic_dims = []
for dim in self.feature_dims:
if isinstance(dim, PerformanceDimension):
self.performance_dims.append(dim)
elif isinstance(dim, PhenotypicDimension):
self.phenotypic_dims.append(dim)
elif isinstance(dim, GenomicDimension):
self.genomic_dims.append(dim)
def calc_performance_dims(self, *args):
for dim in self.performance_dims:
dim.call(*args)
def calc_phenotypic_dims(self, *args):
for dim in self.phenotypic_dims:
dim.call(*args)
def calc_genomic_dims(self, *args):
for dim in self.genomic_dims:
dim.call(*args)
class EnvironmentReinforcement(Environment):
""" Reinforcement environments """
def __init__(self, gym_env_string, parallel=True, trials=1, steps=900, feature_dims=[]):
super().__init__(feature_dims)
self.net = None # Neural network to evaluate
self.trials = trials # Fitness = average of all trials
self.steps = steps # How many steps should
self.gym_env_string = gym_env_string
self.env = None
self.parallel = parallel
def evaluate(self, net, render=False):
""" evaluate the neural net and return the final fitness """
if net.is_void:
return -9999 # return low fitness for void networks
if render:
import keyboard
import gym
self.net = net
self.env = gym.make(self.gym_env_string)
fitness = np.array([])
for trial in range(self.trials if render is False else 999):
observation = self.env.reset()
action = self.net.graph(observation.astype(np.float32)) # self.net.graph.forward(observation).max(0)[1].item()
trial_reward = 0
for step in range(self.steps):
if render:
if keyboard.is_pressed('q'):
self.env.close()
return
elif keyboard.is_pressed('r'):
break
else:
self.env.render()
#action = int(action[0]) if len(action) == 1 else action
observation, reward, done, info = self.env.step(action)
trial_reward += reward
if done:
break
action = self.net.graph(observation.astype(np.float32)) # self.net.graph.forward(observation).max(0)[1].item()
fitness = np.append(fitness, trial_reward)
if render:
try:
self.env.close()
except:
print("FAILED to close env during render. Class EnvironmentReinforcement Def evaluate")
self.net.set_fitness(fitness.max())
self.calc_performance_dims(self.net)
self.calc_phenotypic_dims(self.net)
net.genome.performance_dims = self.performance_dims
net.genome.phenotypic_dims = self.phenotypic_dims
return fitness.max()
class EnvironmentReinforcementCustom(Environment):
""" Reinforcement environments custom (non gym env) """
def __init__(self, env_class, trials=10000):
super().__init__()
self.net = None # Neural network to evaluate
self.opponents = [] # List of opponent nets
self.trials = trials # Fitness = average of all trials
self.env = env_class(trials)
self.num_eval_rounds = trials
def evaluate(self, nets, render=False):
""" evaluate the neural net and return the final fitness """
from game import Game
game = Game(10000)
game.start_game(1, [2,3,4,5,6])
while game.total_rounds_so_far < self.num_eval_rounds:
for action in game.game_loop():
action = int(np.random.choice([0, 1, 2], 1, p=[0.5, 0.15, 0.35])[0])
if action == 0: # check/call
game._bot_check_call()
elif action == 1: # bet/raise/all-in
bet_max = game.bots[game.bot_to_act].stack + game.bots[game.bot_to_act].bet
bet_min = game.current_bet + game.raise_min
if bet_max <= bet_min: # all-in
game._bot_bet(bet_max)
else:
game._bot_bet(randrange(bet_min, bet_max))
else:
game._bot_fold()
game.new_game()
print("new game")
print("all rounds evaluated")
return 0
class EnvironmentClassification(Environment):
""" Classification environments (supervised learning with labelled dataset (lifetime learning)) """
# TODO !!! be careful of overfitting during evolution consider creating a probability distribution of the dataset and sampling from that
# TODO also reset weights before training and evaluating on new test data
def __init__(self, features, labels, gradient_based_learning=False, feature_dims=[]):
super().__init__(feature_dims)
self.net = None # Neural network to evaluate
self.features = features
self.labels = labels
self.gradient_based_learning = gradient_based_learning
@staticmethod
def load_dataset(dataset_file, batch_size=128):
""" load labelled dataset, this occurs on master process """
global pd
import pandas as pd
data = pd.read_csv("./datasets/"+dataset_file)
features, labels = EnvironmentClassification.get_features(data)
inds = random.sample(list(np.arange(len(labels))), batch_size * 2)
test_inds = inds[:batch_size]
val_inds = inds[batch_size:]
test_features = features[test_inds]
test_labels = labels[test_inds]
val_features = features[val_inds]
val_labels = labels[val_inds]
train_features = np.delete(features, inds, 0)
train_labels = np.delete(labels, inds, 0)
return test_features, test_labels, val_features, val_labels, train_features, train_labels
@staticmethod
def get_features(data):
"""
Load features from file
"""
features = data[np.array(data.columns.values)[[3, 4, 5, 6, 7, 8,
9]]].values # Features: BodySpeed, EEGAB, EYEDwell, EYEScan, EyesOffScreen, PressesCount, SingleFastPresses
labels = data["Targets"].values
labels[labels == "correct"] = 1
labels[labels == "mistake"] = 0
features_temp = np.empty((0, 2, 7))
labels_temp = np.array([])
for i in range(len(features) - 1):
features_temp = np.concatenate((features_temp, [np.array([features[i], features[i + 1]])]), axis=0)
labels_temp = np.append(labels_temp, labels[i + 1])
return features_temp, np.eye(2)[labels_temp.astype(int)] # one hot encoding
def evaluate(self, net):
""" evaluate the neural net, perform any lifetime learning """
if net.is_void:
return -9999 # return low fitness for void networks
self.net = net
tp = 0
fn = 0
tn = 0
fp = 0
if self.gradient_based_learning is False:
for i, sample in enumerate(self.features):
y, y_norm, y_arg, y_arg_true = net.predict(sample[-1], self.labels[i])
if y_arg == y_arg_true: # TP or TN
if y_arg == 1:
tp += 1
else:
tn += 1
else: # FP or FN
if y_arg == 0: # FN
fn += 1
else: # FP
fp += 1
#fitness = weighted_categorical_crossentropy(self.labels, y_pred)
#fitness = mcc(tp, tn, fp, fn)
fitness = net.auc(tp, tn, fp, fn)
self.net.set_fitness(fitness)
self.calc_performance_dims(self.net)
self.calc_phenotypic_dims(self.net)
self.calc_genomic_dims(self.net.genome)
net.genome.performance_dims = self.performance_dims
net.genome.phenotypic_dims = self.phenotypic_dims
net.genome.genomic_dims = self.genomic_dims
return fitness
def weighted_categorical_crossentropy(y_true, y_pred):
"""
Weighted categorical crossentropy
"""
y_pred = np.clip(y_pred, np.finfo(float).eps, 1 - np.finfo(float).eps) # clip to prevent NaN's and Inf's
weights = np.flip(np.sum(y_true, axis=0)/len(y_true))
#weights /= np.sum(weights, axis=-1, keepdims=True) # scale weights to sum to 1
#weights = np.expand_dims(weights, axis=1)
log_diff = (y_true * np.log(y_pred)) * weights
return np.sum(1+np.sum(log_diff, -1))/len(log_diff)
def mcc(tp, tn, fp, fn):
""" Mathews Correlation Coefficient classification performance metric for imbalanced datasets.
Note my experiments show that it doesn't fairly balance the measurements as it gives lower score for same
percentage of accuracy for classes with less samples. Use ROC-AUC instead as this does balance """
div = math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
div = div if div != 0 else sys.float_info.epsilon
return ((tp*tn)-(fp*fn))/div