From 2630f04a4ce28bcc5f38a173bd1dffd39032ec6c Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 05:43:45 +0900 Subject: [PATCH 01/22] feature: add google research football environment --- handyrl/envs/gfootball.py | 674 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 674 insertions(+) create mode 100644 handyrl/envs/gfootball.py diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py new file mode 100644 index 00000000..f7e1dca1 --- /dev/null +++ b/handyrl/envs/gfootball.py @@ -0,0 +1,674 @@ +import random +import copy + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from handyrl.environment import BaseEnvironment + + + +class FootballNet(nn.Module): + class FootballHead(nn.Module): + def __init__(self, units0, units1): + super().__init__() + self.fc = nn.Linear(units0, units1) + self.bn = nn.BatchNorm1d(units1) + self.head_p = nn.Linear(units1, 19, bias=False) + self.head_v = nn.Linear(units1, 1, bias=False) + self.head_r = nn.Linear(units1, 1, bias=False) + + def forward(self, x): + h = F.relu_(self.bn(self.fc(x))) + p = self.head_p(h) + v = self.head_v(h) + r = self.head_r(h) + return {'policy': p, 'value': v, 'return': r} + + class CNNModel(nn.Module): + def __init__(self, final_filters): + super().__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(53, 128, kernel_size=1, stride=1, bias=False), + nn.ReLU(inplace=True), + nn.Conv2d(128, 160, kernel_size=1, stride=1, bias=False), + nn.ReLU(inplace=True), + nn.Conv2d(160, 128, kernel_size=1, stride=1, bias=False), + nn.ReLU(inplace=True) + ) + self.pool1 = nn.AdaptiveAvgPool2d((1, 11)) + self.conv2 = nn.Sequential( + nn.BatchNorm2d(128), + nn.Conv2d(128, 160, kernel_size=(1, 1), stride=1, bias=False), + nn.ReLU(inplace=True), + nn.BatchNorm2d(160), + nn.Conv2d(160, 96, kernel_size=(1, 1), stride=1, bias=False), + nn.ReLU(inplace=True), + nn.BatchNorm2d(96), + nn.Conv2d(96, final_filters, kernel_size=(1, 1), stride=1, bias=False), + nn.ReLU(inplace=True), + nn.BatchNorm2d(final_filters), + ) + self.pool2 = nn.AdaptiveAvgPool2d((1, 1)) + self.flatten = nn.Flatten() + + def forward(self, x): + x = x['cnn_feature'] + x = self.conv1(x) + x = self.pool1(x) + x = self.conv2(x) + x = self.pool2(x) + x = self.flatten(x) + return x + + class ActionHistoryEncoder(nn.Module): + def __init__(self, hidden_size=64, num_layers=2): + super().__init__() + self.action_emd = nn.Embedding(19, 8) + self.rnn = nn.GRU(8, hidden_size, num_layers, batch_first=True) + + def forward(self, x): + h = self.action_emd(x['action_history']) + h = h.squeeze(dim=2) + self.rnn.flatten_parameters() + h, _ = self.rnn(h) + return h + + def __init__(self): + super().__init__() + + self.cnn = self.CNNModel(64) # to control + self.rnn = self.ActionHistoryEncoder(64, 2) + self.head = self.FootballHead(157, 64) + + def forward(self, x, hidden): + cnn_h = self.cnn(x) + rnn_h = self.rnn(x) + + h = torch.cat([ + cnn_h.view(cnn_h.size(0), -1), + rnn_h[:, -1, :], + x['ball'], + x['match'], + x['control']], -1) + o = self.head(h) + + return o + + +# feature +def feature_from_states(states, info, number): + # observation list to input tensor + + HISTORY_LENGTH = 8 + + obs_history_ = [s['observation'][number] for s in reversed(states[-HISTORY_LENGTH:])] + obs_history = obs_history_ + [obs_history_[-1]] * (HISTORY_LENGTH - len(obs_history_)) + obs = obs_history[0] + + action_history_ = [s['action'][number] for s in reversed(states[-HISTORY_LENGTH:])] + action_history = action_history_ + [0] * (HISTORY_LENGTH - len(action_history_ )) + + """ + ・left players (x) + ・left players (y) + ・right players (x) + ・right players (y) + ・ball (x) + ・ball (y) + ・left goal (x) + ・left goal (y) + ・right goal (x) + ・right goal (y) + ・active (x) + ・active (y) + + ・left players (x) - right players (x) + ・left players (y) - right players (y) + ・left players (x) - ball (x) + ・left players (y) - ball (y) + ・left players (x) - goal (x) + ・left players (y) - goal (y) + ・left players (x) - active (x) + ・left players (y) - active (y) + + ・left players direction (x) + ・left players direction (y) + ・right players direction (x) + ・right players direction (y) + ・left players direction (x) - right players direction (x) + ・left players direction (y) - right players direction (y) + """ + + # left players + obs_left_team = np.array(obs['left_team']) + left_player_x = np.repeat(obs_left_team[:, 0][..., None], 11, axis=1) + left_player_y = np.repeat(obs_left_team[:, 1][..., None], 11, axis=1) + + # right players + obs_right_team = np.array(obs['right_team']) + right_player_x = np.repeat(obs_right_team[:, 0][..., None], 11, axis=1).transpose(1, 0) + right_player_y = np.repeat(obs_right_team[:, 1][..., None], 11, axis=1).transpose(1, 0) + + # ball + obs_ball = np.array(obs['ball']) + ball_x = np.ones((11, 11)) * obs_ball[0] + ball_y = np.ones((11, 11)) * obs_ball[1] + ball_z = np.ones((11, 11)) * obs_ball[2] + + # goal + left_goal, right_goal = [-1, 0], [1, 0] + left_goal_x = np.ones((11, 11)) * left_goal[0] + left_goal_y = np.ones((11, 11)) * left_goal[1] + right_goal_x = np.ones((11, 11)) * right_goal[0] + right_goal_y = np.ones((11, 11)) * right_goal[1] + + # side line + side_line_y = [-.42, .42] + side_line_y_top = np.ones((11, 11)) * side_line_y[0] + side_line_y_bottom = np.ones((11, 11)) * side_line_y[1] + + # active + active = np.array(obs['active']) + active_player_x = np.repeat(obs_left_team[active][0][..., None, None], 11, axis=1).repeat(11, axis=0) + active_player_y = np.repeat(obs_left_team[active][1][..., None, None], 11, axis=1).repeat(11, axis=0) + + # left players - right players + left_minus_right_player_x = obs_left_team[:, 0][..., None] - obs_right_team[:, 0] + left_minus_right_player_y = obs_left_team[:, 1][..., None] - obs_right_team[:, 1] + + # left players - ball + left_minus_ball_x = (obs_left_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1) + left_minus_ball_y = (obs_left_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1) + + # left players - right goal + left_minus_right_goal_x = (obs_left_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1) + left_minus_right_goal_y = (obs_left_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1) + + # left players - left goal + left_minus_left_goal_x = (obs_left_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1) + left_minus_left_goal_y = (obs_left_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1) + + # right players - right goal + right_minus_right_goal_x = (obs_right_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1).transpose(1, 0) + right_minus_right_goal_y = (obs_right_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1).transpose(1, 0) + + # right players - left goal + right_minus_left_goal_x = (obs_right_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1).transpose(1, 0) + right_minus_left_goal_y = (obs_right_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1).transpose(1, 0) + + # left players (x) - active + left_minus_active_x = (obs_left_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1) + left_minus_active_y = (obs_left_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1) + + # right player - ball + right_minus_ball_x = (obs_right_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1).transpose(1, 0) + right_minus_ball_y = (obs_right_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1).transpose(1, 0) + + # right player - active + right_minus_active_x = (obs_right_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1).transpose(1, 0) + right_minus_active_y = (obs_right_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1).transpose(1, 0) + + # left player - side line + left_minus_side_top = np.abs(obs_left_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1) + left_minus_side_bottom = np.abs(obs_left_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1) + + # right player - side line + right_minus_side_top = np.abs(obs_right_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1).transpose(1, 0) + right_minus_side_bottom = np.abs(obs_right_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1).transpose(1, 0) + + # left players direction + obs_left_team_direction = np.array(obs['left_team_direction']) + left_player_direction_x = np.repeat(obs_left_team_direction[:, 0][..., None], 11, axis=1) + left_player_direction_y = np.repeat(obs_left_team_direction[:, 1][..., None], 11, axis=1) + + # right players direction + obs_right_team_direction = np.array(obs['right_team_direction']) + right_player_direction_x = np.repeat(obs_right_team_direction[:, 0][..., None], 11, axis=1).transpose(1, 0) + right_player_direction_y = np.repeat(obs_right_team_direction[:, 1][..., None], 11, axis=1).transpose(1, 0) + + # ball direction + obs_ball_direction = np.array(obs['ball_direction']) + ball_direction_x = np.ones((11, 11)) * obs_ball_direction[0] + ball_direction_y = np.ones((11, 11)) * obs_ball_direction[1] + ball_direction_z = np.ones((11, 11)) * obs_ball_direction[2] + + # left players direction - right players direction + left_minus_right_player_direction_x = obs_left_team_direction[:, 0][..., None] - obs_right_team_direction[:, 0] + left_minus_right_player_direction_y = obs_left_team_direction[:, 1][..., None] - obs_right_team_direction[:, 1] + + # left players direction - ball direction + left_minus_ball_direction_x = (obs_left_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1) + left_minus_ball_direction_y = (obs_left_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1) + + # right players direction - ball direction + right_minus_ball_direction_x = (obs_right_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1).transpose(1, 0) + right_minus_ball_direction_y = (obs_right_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1).transpose(1, 0) + + # ball rotation + obs_ball_rotation = np.array(obs['ball_rotation']) + ball_rotation_x = np.ones((11, 11)) * obs_ball_rotation[0] + ball_rotation_y = np.ones((11, 11)) * obs_ball_rotation[1] + ball_rotation_z = np.ones((11, 11)) * obs_ball_rotation[2] + + cnn_feature = np.stack([ + left_player_x, + left_player_y, + right_player_x, + right_player_y, + ball_x, + ball_y, + ball_z, + left_goal_x, + left_goal_y, + right_goal_x, + right_goal_y, + side_line_y_top, + side_line_y_bottom, + active_player_x, + active_player_y, + left_minus_right_player_x, + left_minus_right_player_y, + left_minus_right_goal_x, + left_minus_right_goal_y, + left_minus_left_goal_x, + left_minus_left_goal_y, + right_minus_right_goal_x, + right_minus_right_goal_y, + right_minus_left_goal_x, + right_minus_left_goal_y, + left_minus_side_top, + left_minus_side_bottom, + right_minus_side_top, + right_minus_side_bottom, + right_minus_ball_x, + right_minus_ball_y, + right_minus_active_x, + right_minus_active_y, + left_minus_ball_x, + left_minus_ball_y, + left_minus_active_x, + left_minus_active_y, + ball_direction_x, + ball_direction_y, + ball_direction_z, + left_minus_ball_direction_x, + left_minus_ball_direction_y, + right_minus_ball_direction_x, + right_minus_ball_direction_y, + left_player_direction_x, + left_player_direction_y, + right_player_direction_x, + right_player_direction_y, + left_minus_right_player_direction_x, + left_minus_right_player_direction_y, + ball_rotation_x, + ball_rotation_y, + ball_rotation_z, + ], axis=0).astype(np.float32) + + # ball + BALL_OWEND_1HOT = {-1: [0, 0], 0: [1, 0], 1: [0, 1]} + ball_owned_team_ = obs['ball_owned_team'] + ball_owned_team = BALL_OWEND_1HOT[ball_owned_team_] # {-1, 0, 1} None, self, opponent + PLAYER_1HOT = np.concatenate([np.eye(11), np.zeros((1, 11))]) + ball_owned_player_ = PLAYER_1HOT[obs['ball_owned_player']] # {-1, N-1} + if ball_owned_team_ == -1: + my_ball_owned_player = PLAYER_1HOT[-1] + op_ball_owned_player = PLAYER_1HOT[-1] + elif ball_owned_team_ == 0: + my_ball_owned_player = ball_owned_player_ + op_ball_owned_player = PLAYER_1HOT[-1] + else: + my_ball_owned_player = PLAYER_1HOT[-1] + op_ball_owned_player = ball_owned_player_ + + ball_features = np.concatenate([ + obs['ball'], + obs['ball_direction'], + obs['ball_rotation'] + ]).astype(np.float32) + + # self team + left_team_features = np.concatenate([ + [[1] for _ in obs['left_team']], # left team flag + obs['left_team'], # position + obs['left_team_direction'], + [[v] for v in obs['left_team_tired_factor']], + [[v] for v in obs['left_team_yellow_card']], + [[v] for v in obs['left_team_active']], + my_ball_owned_player[...,np.newaxis] + ], axis=1).astype(np.float32) + + left_team_indice = np.arange(0, 11, dtype=np.int32) + + # opponent team + right_team_features = np.concatenate([ + [[0] for _ in obs['right_team']], # right team flag + obs['right_team'], # position + obs['right_team_direction'], + [[v] for v in obs['right_team_tired_factor']], + [[v] for v in obs['right_team_yellow_card']], + [[v] for v in obs['right_team_active']], + op_ball_owned_player[...,np.newaxis] + ], axis=1).astype(np.float32) + + right_team_indice = np.arange(0, 11, dtype=np.int32) + + # distance information + def get_distance(xy1, xy2): + return (((xy1 - xy2) ** 2).sum(axis=-1)) ** 0.5 + + def get_line_distance(x1, x2): + return np.abs(x1 - x2) + + def multi_scale(x, scale): + return 2 / (1 + np.exp(-np.array(x)[..., np.newaxis] / np.array(scale))) + + both_team = np.array(obs['left_team'] + obs['right_team'], dtype=np.float32) + ball = np.array([obs['ball'][:2]], dtype=np.float32) + goal = np.array([[-1, 0], [1, 0]], dtype=np.float32) + goal_line_x = np.array([-1, 1], dtype=np.float32) + side_line_y = np.array([-.42, .42], dtype=np.float32) + + # ball <-> goal, goal line, side line distance + b2g_distance = get_distance(ball, goal) + b2gl_distance = get_line_distance(ball[0][0], goal_line_x) + b2sl_distance = get_line_distance(ball[0][1], side_line_y) + b2o_distance = np.concatenate([ + b2g_distance, b2gl_distance, b2sl_distance + ], axis=-1) + + # player <-> ball, goal, back line, side line distance + p2b_distance = get_distance(both_team[:,np.newaxis,:], ball[np.newaxis,:,:]) + p2g_distance = get_distance(both_team[:,np.newaxis,:], goal[np.newaxis,:,:]) + p2gl_distance = get_line_distance(both_team[:,:1], goal_line_x[np.newaxis,:]) + p2sl_distance = get_line_distance(both_team[:,1:], side_line_y[np.newaxis,:]) + p2bo_distance = np.concatenate([ + p2b_distance, p2g_distance, p2gl_distance, p2sl_distance + ], axis=-1) + + # player <-> player distance + p2p_distance = get_distance(both_team[:,np.newaxis,:], both_team[np.newaxis,:,:]) + + # controlled player information + control_flag_ = np.array(PLAYER_1HOT[obs['active']], dtype=np.float32) + control_flag = np.concatenate([control_flag_, np.zeros(len(obs['right_team']), dtype=np.float32)])[...,np.newaxis] + + # controlled status information + DIR = [ + [-1, 0], [-.707, -.707], [0, 1], [ .707, -.707], # L, TL, T, TR + [ 1, 0], [ .707, .707], [0, -1], [-.707, .707] # R, BR, B, BL + ] + + sticky_direction = DIR[np.where(obs['sticky_actions'][:8] == 1)[0][0]] if 1 in obs['sticky_actions'][:8] else [0, 0] + sticky_flags = obs['sticky_actions'][8:] + + control_features = np.concatenate([ + sticky_direction, + sticky_flags, + ]).astype(np.float32) + + # Match state + if obs['steps_left'] > info['half_step']: + steps_left_half = obs['steps_left'] - info['half_step'] + else: + steps_left_half = obs['steps_left'] + match_features = np.concatenate([ + multi_scale(obs['score'], [1, 3]).ravel(), + multi_scale(obs['score'][0] - obs['score'][1], [1, 3]), + multi_scale(obs['steps_left'], [10, 100, 1000, 10000]), + multi_scale(steps_left_half, [10, 100, 1000, 10000]), + ball_owned_team, + ]).astype(np.float32) + + mode_index = np.array([obs['game_mode']], dtype=np.int32) + + action_history = np.array(action_history, dtype=np.int32)[..., None] + + return { + # features + 'ball': ball_features, + 'match': match_features, + 'player': { + 'self': left_team_features, + 'opp': right_team_features + }, + 'control': control_features, + 'player_index': { + 'self': left_team_indice, + 'opp': right_team_indice + }, + 'mode_index': mode_index, + 'control_flag': control_flag, + # distances + 'distance': { + 'p2p': p2p_distance, + 'p2bo': p2bo_distance, + 'b2o': b2o_distance + }, + # CNN + 'cnn_feature': cnn_feature, + 'action_history': action_history + } + + +# https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/football/helpers.py + +import enum + +class Action(enum.IntEnum): + Idle = 0 + Left = 1 + TopLeft = 2 + Top = 3 + TopRight = 4 + Right = 5 + BottomRight = 6 + Bottom = 7 + BottomLeft = 8 + LongPass= 9 + HighPass = 10 + ShortPass = 11 + Shot = 12 + Sprint = 13 + ReleaseDirection = 14 + ReleaseSprint = 15 + Slide = 16 + Dribble = 17 + ReleaseDribble = 18 + +sticky_index_to_action = [ + Action.Left, + Action.TopLeft, + Action.Top, + Action.TopRight, + Action.Right, + Action.BottomRight, + Action.Bottom, + Action.BottomLeft, + Action.Sprint, + Action.Dribble +] + +action_to_sticky_index = { + a: index for index, a in enumerate(sticky_index_to_action) +} + +class PlayerRole(enum.IntEnum): + GoalKeeper = 0 + CenterBack = 1 + LeftBack = 2 + RightBack = 3 + DefenceMidfield = 4 + CentralMidfield = 5 + LeftMidfield = 6 + RIghtMidfield = 7 + AttackMidfield = 8 + CentralFront = 9 + + +class GameMode(enum.IntEnum): + Normal = 0 + KickOff = 1 + GoalKick = 2 + FreeKick = 3 + Corner = 4 + ThrowIn = 5 + Penalty = 6 + + +class Environment(BaseEnvironment): + ACTION_LEN = 19 + CONTROLLED_PLAYERS = 1 + + def __init__(self, args=None): + self.env = None + args = args if args is not None else {} + self.limit_step = args.get('limit_step', 600) + self.controlled_players = 1 + + def reset(self, args=None): + if self.env is None: + from gfootball.env import create_environment + + self.env = create_environment( + env_name="11_vs_11_stochastic", + representation='raw', + number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS, + number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS) + + obs = self.env.reset() + self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True) + + def update(self, state, reset): + if reset: + self.done = False + self.prev_score = [0, 0] + self.states = [] + self.half_step = 1500 + self.reserved_action = [None, None] + else: + self.prev_score = self.score() + + state = copy.deepcopy(state) + state = self._preprocess_state(state) + self.states.append(state) + + if reset: + self.half_step = state['observation'][0]['steps_left'] // 2 + + def step(self, actions): + # state transition function + # action is integer (0 ~ 18) + actions = copy.deepcopy(actions) + for i, res_action in enumerate(self.reserved_action): + if res_action is not None: + actions[i] = res_action + + # step environment + flat_actions = [actions[0], actions[1]] + obs, _, self.done, _ = self.env.step(flat_actions) + self.update({'observation': obs, 'action': flat_actions}, reset=False) + + def diff_info(self): + return self.states[-1] + + def turns(self): + return self.players() + + def players(self): + return [0, 1] + + def terminal(self): + # check whether the state is terminal + return self.done \ + or len(self.states) > self.limit_step \ + or sum(self.score().values()) > 0 # finish after first goal + + def score(self): + if len(self.states) == 0: + return [0, 0] + state = self.states[-1] + return {p: state['observation'][0]['score'][p] for p in self.players()} + + def reward(self): + prev_score = self.prev_score + score = self.score() + + rewards = {} + for p in self.players(): + r = 1.0 * (score[p] - prev_score[p]) - 1.0 * (score[1 - p] - prev_score[1 - p]) + rewards[p] = r + + return rewards + + def outcome(self): + scores = self.score() + if scores[0] > scores[1]: + return [1, -1] + elif scores[0] < scores[1]: + return [-1, 1] + return [0, 0] + + def legal_actions(self, player, number=0): + # legal action list + return list(range(self.ACTION_LEN)) + + def raw_observation(self, player): + return self.states[-1]['observation'][player] + + def observation(self, player, number=0): + # input feature for neural nets + info = {'half_step': self.half_step} + return feature_from_states(self.states, info, player * self.CONTROLLED_PLAYERS + number) + + def _preprocess_state(self, state): + if state is None: + return state + + # in ball-dead state, set ball owned player and team + for o in state['observation']: + mode = o['game_mode'] + if mode == GameMode.FreeKick or \ + mode == GameMode.Corner or \ + mode == GameMode.Penalty or \ + mode == GameMode.GoalKick: + # find nearest player and team + def dist(xy1, xy2): + return ((xy1[0] - xy2[0]) ** 2 + (xy1[1] - xy2[1]) ** 2) ** 0.5 + team_player_position = [(0, i, p) for i, p in enumerate(o['left_team'])] + \ + [(1, i, p) for i, p in enumerate(o['right_team'])] + distances = [(t[0], t[1], dist(t[2], o['ball'][:2])) for t in team_player_position] + distances = sorted(distances, key=lambda x: x[2]) + + o['ball_owned_team'] = distances[0][0] + o['ball_owned_player'] = distances[0][1] + + return state + + def net(self): + return FootballNet() + + +if __name__ == '__main__': + e = Environment() + for _ in range(1): + e.reset() + o = e.observation(0) + while not e.terminal(): + # print(e) + _ = e.observation(0) + _ = e.observation(1) + #print(e.raw_observation(0)[0]['steps_left']) + action_list = [0, 0] + action_list[0] = random.choice(e.legal_actions(0)) + action_list[1] = random.choice(e.legal_actions(1)) + print(len(e.states), action_list) + e.step(action_list) + print(e.reward()) + print(e.score()) + print(e.outcome()) From adf111fa8a21a725f5210427dc0ccf5274736aee Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 06:05:18 +0900 Subject: [PATCH 02/22] feature: set action set=v2 --- handyrl/envs/gfootball.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index f7e1dca1..67e73d9d 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -524,6 +524,7 @@ class GameMode(enum.IntEnum): class Environment(BaseEnvironment): ACTION_LEN = 19 CONTROLLED_PLAYERS = 1 + FINISH_BY_GOAL = True def __init__(self, args=None): self.env = None @@ -539,7 +540,8 @@ def reset(self, args=None): env_name="11_vs_11_stochastic", representation='raw', number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS, - number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS) + number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS, + other_config_options={'action_set': 'v2'}) obs = self.env.reset() self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True) @@ -587,7 +589,7 @@ def terminal(self): # check whether the state is terminal return self.done \ or len(self.states) > self.limit_step \ - or sum(self.score().values()) > 0 # finish after first goal + or (self.FINISH_BY_GOAL and sum(self.score().values()) > 0) def score(self): if len(self.states) == 0: @@ -609,10 +611,10 @@ def reward(self): def outcome(self): scores = self.score() if scores[0] > scores[1]: - return [1, -1] + return {0: 1, 1: -1} elif scores[0] < scores[1]: - return [-1, 1] - return [0, 0] + return {0: -1, 1: 1} + return {0: 0, 1: 0} def legal_actions(self, player, number=0): # legal action list @@ -666,9 +668,8 @@ def net(self): #print(e.raw_observation(0)[0]['steps_left']) action_list = [0, 0] action_list[0] = random.choice(e.legal_actions(0)) - action_list[1] = random.choice(e.legal_actions(1)) + action_list[1] = 19 print(len(e.states), action_list) e.step(action_list) - print(e.reward()) - print(e.score()) + print(e.score()) print(e.outcome()) From 2aaf70a22babea7a01d44b9d1b1b817a85e825c4 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 06:34:50 +0900 Subject: [PATCH 03/22] feature: add rulebase action (builin ai) --- handyrl/envs/gfootball.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 67e73d9d..140799f0 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -652,6 +652,9 @@ def dist(xy1, xy2): return state + def rule_based_action(self, player=None, number=0): + return 19 + def net(self): return FootballNet() From 473b047c919c3c99a04ed7ce88dc2202513ea68f Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 06:57:36 +0900 Subject: [PATCH 04/22] feature: rulebase agents in football environment --- handyrl/agent.py | 5 ++++- handyrl/envs/gfootball.py | 12 ++++++++++-- handyrl/evaluation.py | 5 +++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/handyrl/agent.py b/handyrl/agent.py index cbb5c961..71a2dde6 100755 --- a/handyrl/agent.py +++ b/handyrl/agent.py @@ -23,9 +23,12 @@ def observe(self, env, player, show=False): class RuleBasedAgent(RandomAgent): + def __init__(self, key=None): + self.key = None + def action(self, env, player, show=False): if hasattr(env, 'rule_based_action'): - return env.rule_based_action(player) + return env.rule_based_action(player, key=self.key) else: return random.choice(env.legal_actions(player)) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 140799f0..8f3f0881 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -652,8 +652,16 @@ def dist(xy1, xy2): return state - def rule_based_action(self, player=None, number=0): - return 19 + def rule_based_action(self, player=None, number=0, key=None): + if key is None: + key = 'builtin_ai' + + if key == 'builtin_ai': + return 19 + elif key == 'idle': + return 14 + elif key == 'right': + return 5 def net(self): return FootballNet() diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py index ad770e30..4bb28c16 100755 --- a/handyrl/evaluation.py +++ b/handyrl/evaluation.py @@ -143,8 +143,9 @@ def exec_network_match(env, network_agents, critic=None, show=False, game_args={ def build_agent(raw, env=None): if raw == 'random': return RandomAgent() - elif raw == 'rulebase': - return RuleBasedAgent() + elif raw.startswith('rulebase'): + key = rulebase.split('-')[1] if '-' in raw else None + return RuleBasedAgent(key) return None From 4e01997f43f28dfef5324559ef341ccec319559c Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 06:59:08 +0900 Subject: [PATCH 05/22] fix: rulebase agent key --- handyrl/evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py index 4bb28c16..720ea5f9 100755 --- a/handyrl/evaluation.py +++ b/handyrl/evaluation.py @@ -144,7 +144,7 @@ def build_agent(raw, env=None): if raw == 'random': return RandomAgent() elif raw.startswith('rulebase'): - key = rulebase.split('-')[1] if '-' in raw else None + key = raw.split('-')[1] if '-' in raw else None return RuleBasedAgent(key) return None From 306f20c482942433de7499b416039f694a8f4f57 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 07:06:49 +0900 Subject: [PATCH 06/22] fix: rulebase key --- handyrl/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handyrl/agent.py b/handyrl/agent.py index 71a2dde6..c2e0b78f 100755 --- a/handyrl/agent.py +++ b/handyrl/agent.py @@ -24,7 +24,7 @@ def observe(self, env, player, show=False): class RuleBasedAgent(RandomAgent): def __init__(self, key=None): - self.key = None + self.key = key def action(self, env, player, show=False): if hasattr(env, 'rule_based_action'): From 44a72621da04e3fba863272f640ebea3699b9905 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 07:54:35 +0900 Subject: [PATCH 07/22] feature: cnn feature save memory --- handyrl/envs/gfootball.py | 170 +++++++++++++++++++++----------------- 1 file changed, 95 insertions(+), 75 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 8f3f0881..742b797d 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -57,6 +57,12 @@ def __init__(self, final_filters): def forward(self, x): x = x['cnn_feature'] + x = torch.cat([ + x['2d'], + x['left'].unsqueeze(-1).repeat(1, 1, 1, 11), + x['right'].unsqueeze(-2).repeat(1, 1, 11, 1), + x['scalar'].unsqueeze(-1).unsqueeze(-1).repeat(1, 1, 11, 11), + ], 1) x = self.conv1(x) x = self.pool1(x) x = self.conv2(x) @@ -145,120 +151,118 @@ def feature_from_states(states, info, number): # left players obs_left_team = np.array(obs['left_team']) - left_player_x = np.repeat(obs_left_team[:, 0][..., None], 11, axis=1) - left_player_y = np.repeat(obs_left_team[:, 1][..., None], 11, axis=1) + left_player_x = obs_left_team[:, 0] + left_player_y = obs_left_team[:, 1] # right players obs_right_team = np.array(obs['right_team']) - right_player_x = np.repeat(obs_right_team[:, 0][..., None], 11, axis=1).transpose(1, 0) - right_player_y = np.repeat(obs_right_team[:, 1][..., None], 11, axis=1).transpose(1, 0) + right_player_x = obs_right_team[:, 0] + right_player_y = obs_right_team[:, 1] # ball obs_ball = np.array(obs['ball']) - ball_x = np.ones((11, 11)) * obs_ball[0] - ball_y = np.ones((11, 11)) * obs_ball[1] - ball_z = np.ones((11, 11)) * obs_ball[2] + ball_x = obs_ball[0] + ball_y = obs_ball[1] + ball_z = obs_ball[2] # goal left_goal, right_goal = [-1, 0], [1, 0] - left_goal_x = np.ones((11, 11)) * left_goal[0] - left_goal_y = np.ones((11, 11)) * left_goal[1] - right_goal_x = np.ones((11, 11)) * right_goal[0] - right_goal_y = np.ones((11, 11)) * right_goal[1] + left_goal_x = left_goal[0] + left_goal_y = left_goal[1] + right_goal_x = right_goal[0] + right_goal_y = right_goal[1] # side line side_line_y = [-.42, .42] - side_line_y_top = np.ones((11, 11)) * side_line_y[0] - side_line_y_bottom = np.ones((11, 11)) * side_line_y[1] + side_line_y_top = side_line_y[0] + side_line_y_bottom = side_line_y[1] # active active = np.array(obs['active']) - active_player_x = np.repeat(obs_left_team[active][0][..., None, None], 11, axis=1).repeat(11, axis=0) - active_player_y = np.repeat(obs_left_team[active][1][..., None, None], 11, axis=1).repeat(11, axis=0) + active_player_x = obs_left_team[active][0] + active_player_y = obs_left_team[active][1] # left players - right players left_minus_right_player_x = obs_left_team[:, 0][..., None] - obs_right_team[:, 0] left_minus_right_player_y = obs_left_team[:, 1][..., None] - obs_right_team[:, 1] # left players - ball - left_minus_ball_x = (obs_left_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1) - left_minus_ball_y = (obs_left_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1) + left_minus_ball_x = obs_left_team[:, 0] - obs_ball[0] + left_minus_ball_y = obs_left_team[:, 1] - obs_ball[1] # left players - right goal - left_minus_right_goal_x = (obs_left_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1) - left_minus_right_goal_y = (obs_left_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1) + left_minus_right_goal_x = obs_left_team[:, 0] - right_goal[0] + left_minus_right_goal_y = obs_left_team[:, 1] - right_goal[1] # left players - left goal - left_minus_left_goal_x = (obs_left_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1) - left_minus_left_goal_y = (obs_left_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1) + left_minus_left_goal_x = obs_left_team[:, 0] - left_goal[0] + left_minus_left_goal_y = obs_left_team[:, 1] - left_goal[1] # right players - right goal - right_minus_right_goal_x = (obs_right_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1).transpose(1, 0) - right_minus_right_goal_y = (obs_right_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1).transpose(1, 0) + right_minus_right_goal_x = obs_right_team[:, 0] - right_goal[0] + right_minus_right_goal_y = obs_right_team[:, 1] - right_goal[1] # right players - left goal - right_minus_left_goal_x = (obs_right_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1).transpose(1, 0) - right_minus_left_goal_y = (obs_right_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1).transpose(1, 0) + right_minus_left_goal_x = obs_right_team[:, 0] - left_goal[0] + right_minus_left_goal_y = obs_right_team[:, 1] - left_goal[1] # left players (x) - active - left_minus_active_x = (obs_left_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1) - left_minus_active_y = (obs_left_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1) + left_minus_active_x = obs_left_team[:, 0] - obs_left_team[active][0] + left_minus_active_y = obs_left_team[:, 1] - obs_left_team[active][1] # right player - ball - right_minus_ball_x = (obs_right_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1).transpose(1, 0) - right_minus_ball_y = (obs_right_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1).transpose(1, 0) + right_minus_ball_x = obs_right_team[:, 0] - obs_ball[0] + right_minus_ball_y = obs_right_team[:, 1] - obs_ball[1] # right player - active - right_minus_active_x = (obs_right_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1).transpose(1, 0) - right_minus_active_y = (obs_right_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1).transpose(1, 0) + right_minus_active_x = obs_right_team[:, 0] - obs_left_team[active][0] + right_minus_active_y = obs_right_team[:, 1] - obs_left_team[active][1] # left player - side line - left_minus_side_top = np.abs(obs_left_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1) - left_minus_side_bottom = np.abs(obs_left_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1) + left_minus_side_top = np.abs(obs_left_team[:, 1] - side_line_y[0]) + left_minus_side_bottom = np.abs(obs_left_team[:, 1] - side_line_y[1]) # right player - side line - right_minus_side_top = np.abs(obs_right_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1).transpose(1, 0) - right_minus_side_bottom = np.abs(obs_right_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1).transpose(1, 0) + right_minus_side_top = np.abs(obs_right_team[:, 1] - side_line_y[0]) + right_minus_side_bottom = np.abs(obs_right_team[:, 1] - side_line_y[1]) # left players direction obs_left_team_direction = np.array(obs['left_team_direction']) - left_player_direction_x = np.repeat(obs_left_team_direction[:, 0][..., None], 11, axis=1) - left_player_direction_y = np.repeat(obs_left_team_direction[:, 1][..., None], 11, axis=1) + left_player_direction_x = obs_left_team_direction[:, 0] + left_player_direction_y = obs_left_team_direction[:, 1] # right players direction obs_right_team_direction = np.array(obs['right_team_direction']) - right_player_direction_x = np.repeat(obs_right_team_direction[:, 0][..., None], 11, axis=1).transpose(1, 0) - right_player_direction_y = np.repeat(obs_right_team_direction[:, 1][..., None], 11, axis=1).transpose(1, 0) + right_player_direction_x = obs_right_team_direction[:, 0] + right_player_direction_y = obs_right_team_direction[:, 1] # ball direction obs_ball_direction = np.array(obs['ball_direction']) - ball_direction_x = np.ones((11, 11)) * obs_ball_direction[0] - ball_direction_y = np.ones((11, 11)) * obs_ball_direction[1] - ball_direction_z = np.ones((11, 11)) * obs_ball_direction[2] + ball_direction_x = obs_ball_direction[0] + ball_direction_y = obs_ball_direction[1] + ball_direction_z = obs_ball_direction[2] # left players direction - right players direction left_minus_right_player_direction_x = obs_left_team_direction[:, 0][..., None] - obs_right_team_direction[:, 0] left_minus_right_player_direction_y = obs_left_team_direction[:, 1][..., None] - obs_right_team_direction[:, 1] # left players direction - ball direction - left_minus_ball_direction_x = (obs_left_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1) - left_minus_ball_direction_y = (obs_left_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1) + left_minus_ball_direction_x = obs_left_team_direction[:, 0] - obs_ball_direction[0] + left_minus_ball_direction_y = obs_left_team_direction[:, 1] - obs_ball_direction[1] # right players direction - ball direction - right_minus_ball_direction_x = (obs_right_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1).transpose(1, 0) - right_minus_ball_direction_y = (obs_right_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1).transpose(1, 0) + right_minus_ball_direction_x = obs_right_team_direction[:, 0] - obs_ball_direction[0] + right_minus_ball_direction_y = obs_right_team_direction[:, 1] - obs_ball_direction[1] # ball rotation obs_ball_rotation = np.array(obs['ball_rotation']) - ball_rotation_x = np.ones((11, 11)) * obs_ball_rotation[0] - ball_rotation_y = np.ones((11, 11)) * obs_ball_rotation[1] - ball_rotation_z = np.ones((11, 11)) * obs_ball_rotation[2] + ball_rotation_x = obs_ball_rotation[0] + ball_rotation_y = obs_ball_rotation[1] + ball_rotation_z = obs_ball_rotation[2] - cnn_feature = np.stack([ - left_player_x, - left_player_y, - right_player_x, - right_player_y, + cnn_scalar = np.stack([ + active_player_x, + active_player_y, ball_x, ball_y, ball_z, @@ -268,46 +272,57 @@ def feature_from_states(states, info, number): right_goal_y, side_line_y_top, side_line_y_bottom, - active_player_x, - active_player_y, - left_minus_right_player_x, - left_minus_right_player_y, + ball_direction_x, + ball_direction_y, + ball_direction_z, + ball_rotation_x, + ball_rotation_y, + ball_rotation_z, + ]).astype(np.float32) + + cnn_left = np.stack([ + left_player_x, + left_player_y, + left_minus_active_x, + left_minus_active_y, left_minus_right_goal_x, left_minus_right_goal_y, left_minus_left_goal_x, left_minus_left_goal_y, + left_minus_side_top, + left_minus_side_bottom, + left_minus_ball_x, + left_minus_ball_y, + left_minus_ball_direction_x, + left_minus_ball_direction_y, + ]).astype(np.float32) + + cnn_right = np.stack([ + right_player_x, + right_player_y, + right_minus_active_x, + right_minus_active_y, right_minus_right_goal_x, right_minus_right_goal_y, right_minus_left_goal_x, right_minus_left_goal_y, - left_minus_side_top, - left_minus_side_bottom, right_minus_side_top, right_minus_side_bottom, right_minus_ball_x, right_minus_ball_y, - right_minus_active_x, - right_minus_active_y, - left_minus_ball_x, - left_minus_ball_y, - left_minus_active_x, - left_minus_active_y, - ball_direction_x, - ball_direction_y, - ball_direction_z, - left_minus_ball_direction_x, - left_minus_ball_direction_y, right_minus_ball_direction_x, right_minus_ball_direction_y, left_player_direction_x, left_player_direction_y, right_player_direction_x, right_player_direction_y, + ]).astype(np.float32) + + cnn_2d = np.stack([ + left_minus_right_player_x, + left_minus_right_player_y, left_minus_right_player_direction_x, left_minus_right_player_direction_y, - ball_rotation_x, - ball_rotation_y, - ball_rotation_z, ], axis=0).astype(np.float32) # ball @@ -451,7 +466,12 @@ def multi_scale(x, scale): 'b2o': b2o_distance }, # CNN - 'cnn_feature': cnn_feature, + 'cnn_feature': { + 'scalar': cnn_scalar, + 'left': cnn_left, + 'right': cnn_right, + '2d': cnn_2d + }, 'action_history': action_history } From 1c77cb0a5fa2607bd50e709f35ad2cfff1395279 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 07:55:52 +0900 Subject: [PATCH 08/22] fix: cnn feature save memory --- handyrl/envs/gfootball.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 742b797d..6928ed43 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -295,6 +295,8 @@ def feature_from_states(states, info, number): left_minus_ball_y, left_minus_ball_direction_x, left_minus_ball_direction_y, + left_player_direction_x, + left_player_direction_y, ]).astype(np.float32) cnn_right = np.stack([ @@ -312,8 +314,6 @@ def feature_from_states(states, info, number): right_minus_ball_y, right_minus_ball_direction_x, right_minus_ball_direction_y, - left_player_direction_x, - left_player_direction_y, right_player_direction_x, right_player_direction_y, ]).astype(np.float32) From 0a5d6c35d813db9e5005e0a0b259fb2427ae7b30 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 08:49:28 +0900 Subject: [PATCH 09/22] feature: reverse history --- handyrl/envs/gfootball.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 6928ed43..999d0437 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -111,12 +111,12 @@ def feature_from_states(states, info, number): HISTORY_LENGTH = 8 - obs_history_ = [s['observation'][number] for s in reversed(states[-HISTORY_LENGTH:])] - obs_history = obs_history_ + [obs_history_[-1]] * (HISTORY_LENGTH - len(obs_history_)) - obs = obs_history[0] + obs_history_ = [s['observation'][number] for s in states[-HISTORY_LENGTH:]] + obs_history = [obs_history_[0]] * (HISTORY_LENGTH - len(obs_history_)) + obs_history_ + obs = obs_history[-1] - action_history_ = [s['action'][number] for s in reversed(states[-HISTORY_LENGTH:])] - action_history = action_history_ + [0] * (HISTORY_LENGTH - len(action_history_ )) + action_history_ = [s['action'][number] for s in states[-HISTORY_LENGTH:]] + action_history = [0] * (HISTORY_LENGTH - len(action_history_ )) + action_history_ """ ・left players (x) From f72c390d4cf3b0ec1f6cd0d00d3bb22df3c90a0e Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 16:09:25 +0900 Subject: [PATCH 10/22] experiment: render football output --- handyrl/envs/gfootball.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 999d0437..d7df6e48 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -18,15 +18,19 @@ def __init__(self, units0, units1): self.fc = nn.Linear(units0, units1) self.bn = nn.BatchNorm1d(units1) self.head_p = nn.Linear(units1, 19, bias=False) - self.head_v = nn.Linear(units1, 1, bias=False) + #self.head_v = nn.Linear(units1, 1, bias=False) self.head_r = nn.Linear(units1, 1, bias=False) def forward(self, x): h = F.relu_(self.bn(self.fc(x))) p = self.head_p(h) - v = self.head_v(h) + #v = self.head_v(h) r = self.head_r(h) - return {'policy': p, 'value': v, 'return': r} + return { + 'policy': p, + #'value': v, + 'return': r + } class CNNModel(nn.Module): def __init__(self, final_filters): @@ -109,7 +113,7 @@ def forward(self, x, hidden): def feature_from_states(states, info, number): # observation list to input tensor - HISTORY_LENGTH = 8 + HISTORY_LENGTH = 20 obs_history_ = [s['observation'][number] for s in states[-HISTORY_LENGTH:]] obs_history = [obs_history_[0]] * (HISTORY_LENGTH - len(obs_history_)) + obs_history_ @@ -549,7 +553,7 @@ class Environment(BaseEnvironment): def __init__(self, args=None): self.env = None args = args if args is not None else {} - self.limit_step = args.get('limit_step', 600) + self.limit_step = args.get('limit_step', 1000) self.controlled_players = 1 def reset(self, args=None): @@ -559,10 +563,14 @@ def reset(self, args=None): self.env = create_environment( env_name="11_vs_11_stochastic", representation='raw', + write_full_episode_dumps=True, + logdir='videos', + write_video=True, number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS, number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS, - other_config_options={'action_set': 'v2'}) + other_config_options={'action_set': 'v2', 'video_quality_level': 2}) + self.env.render() obs = self.env.reset() self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True) @@ -621,6 +629,8 @@ def reward(self): prev_score = self.prev_score score = self.score() + print(prev_score, score) + rewards = {} for p in self.players(): r = 1.0 * (score[p] - prev_score[p]) - 1.0 * (score[1 - p] - prev_score[1 - p]) @@ -702,5 +712,7 @@ def net(self): action_list[1] = 19 print(len(e.states), action_list) e.step(action_list) - print(e.score()) + print(e.reward()) + if sum(e.score().values()) > 0: + print('goal!') print(e.outcome()) From 5455f15bb47912eee24eb0aa2576f742cc5657f3 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 21:46:58 +0900 Subject: [PATCH 11/22] feature: update fottball environment --- handyrl/envs/gfootball.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 999d0437..6ed6c147 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -109,7 +109,7 @@ def forward(self, x, hidden): def feature_from_states(states, info, number): # observation list to input tensor - HISTORY_LENGTH = 8 + HISTORY_LENGTH = 20 obs_history_ = [s['observation'][number] for s in states[-HISTORY_LENGTH:]] obs_history = [obs_history_[0]] * (HISTORY_LENGTH - len(obs_history_)) + obs_history_ @@ -323,7 +323,7 @@ def feature_from_states(states, info, number): left_minus_right_player_y, left_minus_right_player_direction_x, left_minus_right_player_direction_y, - ], axis=0).astype(np.float32) + ]).astype(np.float32) # ball BALL_OWEND_1HOT = {-1: [0, 0], 0: [1, 0], 1: [0, 1]} @@ -448,23 +448,23 @@ def multi_scale(x, scale): # features 'ball': ball_features, 'match': match_features, - 'player': { - 'self': left_team_features, - 'opp': right_team_features - }, + #'player': { + # 'self': left_team_features, + # 'opp': right_team_features + #}, 'control': control_features, - 'player_index': { - 'self': left_team_indice, - 'opp': right_team_indice - }, + #'player_index': { + # 'self': left_team_indice, + # 'opp': right_team_indice + #}, 'mode_index': mode_index, 'control_flag': control_flag, # distances - 'distance': { - 'p2p': p2p_distance, - 'p2bo': p2bo_distance, - 'b2o': b2o_distance - }, + #'distance': { + # 'p2p': p2p_distance, + # 'p2bo': p2bo_distance, + # 'b2o': b2o_distance + #}, # CNN 'cnn_feature': { 'scalar': cnn_scalar, @@ -549,8 +549,7 @@ class Environment(BaseEnvironment): def __init__(self, args=None): self.env = None args = args if args is not None else {} - self.limit_step = args.get('limit_step', 600) - self.controlled_players = 1 + self.limit_step = args.get('limit_step', 1000) def reset(self, args=None): if self.env is None: From 9860e07ffea88afd5e950943946fa056e603cec7 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 23 Apr 2022 21:50:06 +0900 Subject: [PATCH 12/22] feature: change render option --- handyrl/envs/gfootball.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 3917ac82..42ac63dc 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -567,7 +567,7 @@ def reset(self, args=None): write_video=True, number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS, number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS, - other_config_options={'action_set': 'v2', 'video_quality_level': 2}) + other_config_options={'action_set': 'v2'}) self.env.render() obs = self.env.reset() From 855ade724a4330cb69d518e7fb0d5ba79b172a67 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Wed, 11 May 2022 23:20:16 +0900 Subject: [PATCH 13/22] feature: 115+alpha feature, lstm net --- handyrl/envs/gfootball.py | 120 +++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 6ed6c147..5b972526 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -105,7 +105,120 @@ def forward(self, x, hidden): return o +class FootballRecurrentNet(nn.Module): + def __init__(self): + super().__init__() + units = 128 + + self.units = units + self.fc1 = nn.Linear(125, units) + self.fc2 = nn.Linear(units, units) + self.rnn_blocks = nn.ModuleList([nn.LSTMCell(units, units) for _ in range(4)]) + self.fc3 = nn.Linear(units, units) + self.fcp = nn.Linear(units, 19, bias=False) + self.fcv = nn.Linear(units, 1, bias=False) + self.fcr = nn.Linear(units, 1, bias=False) + + def init_hidden(self, batch_size): + return [(torch.zeros(*batch_size, self.units), + torch.zeros(*batch_size, self.units)) for _ in self.rnn_blocks] + + def forward(self, x, hidden): + h = x + h = F.relu_(self.fc1(h)) + h = F.relu_(self.fc2(h)) + next_hidden = [] + for block, hidden_ in zip(self.rnn_blocks, hidden): + h, c_ = block(h, hidden_) + next_hidden.append((h, c_)) + + h = F.relu_(h) + p = self.fcp(h) + v = self.fcv(h) + r = self.fcr(h) + + return {'policy': p, 'value': v, 'return': r, 'hidden': next_hidden} + + +# https://github.com/google-research/football/blob/12f93de031e7f7c105f32924d113b1f7e6d77349/gfootball/env/wrappers.py + +def convert_observation_115_plus_alpha(observation, fixed_positions): + """Converts an observation into simple115 (or simple115v2) format. + Args: + observation: observation that the environment returns + fixed_positions: Players and positions are always occupying 88 fields + (even if the game is played 1v1). + If True, the position of the player will be the same - no + matter how many players are on the field: + (so first 11 pairs will belong to the first team, even + if it has less players). + If False, then the position of players from team2 + will depend on number of players in team1). + Returns: + (N, 115) shaped representation, where N stands for the number of players + being controlled. + """ + + def do_flatten(obj): + """Run flatten on either python list or numpy array.""" + if type(obj) == list: + return np.array(obj).flatten() + return obj.flatten() + + final_obs = [] + for obs in observation: + o = [] + if fixed_positions: + for i, name in enumerate(['left_team', 'left_team_direction', + 'right_team', 'right_team_direction']): + o.extend(do_flatten(obs[name])) + # If there were less than 11vs11 players we backfill missing values + # with -1. + if len(o) < (i + 1) * 22: + o.extend([-1] * ((i + 1) * 22 - len(o))) + else: + o.extend(do_flatten(obs['left_team'])) + o.extend(do_flatten(obs['left_team_direction'])) + o.extend(do_flatten(obs['right_team'])) + o.extend(do_flatten(obs['right_team_direction'])) + + # If there were less than 11vs11 players we backfill missing values with + # -1. + # 88 = 11 (players) * 2 (teams) * 2 (positions & directions) * 2 (x & y) + if len(o) < 88: + o.extend([-1] * (88 - len(o))) + + # ball position + o.extend(obs['ball']) + # ball direction + o.extend(obs['ball_direction']) + # one hot encoding of which team owns the ball + if obs['ball_owned_team'] == -1: + o.extend([1, 0, 0]) + if obs['ball_owned_team'] == 0: + o.extend([0, 1, 0]) + if obs['ball_owned_team'] == 1: + o.extend([0, 0, 1]) + + active = [0] * 11 + if obs['active'] != -1: + active[obs['active']] = 1 + o.extend(active) + + game_mode = [0] * 7 + game_mode[obs['game_mode']] = 1 + o.extend(game_mode) + + # sticky actions + o.extend(obs['sticky_actions']) + + final_obs.append(o) + + return np.array(final_obs, dtype=np.float32) + + # feature + def feature_from_states(states, info, number): # observation list to input tensor @@ -645,7 +758,9 @@ def raw_observation(self, player): def observation(self, player, number=0): # input feature for neural nets info = {'half_step': self.half_step} - return feature_from_states(self.states, info, player * self.CONTROLLED_PLAYERS + number) + index = player * self.CONTROLLED_PLAYERS + number + #return feature_from_states(self.states, info, ) + return convert_observation_115_plus_alpha(self.states[-1]['observation'], True)[index] def _preprocess_state(self, state): if state is None: @@ -683,7 +798,8 @@ def rule_based_action(self, player=None, number=0, key=None): return 5 def net(self): - return FootballNet() + #return FootballNet() + return FootballRecurrentNet() if __name__ == '__main__': From e70074738fb60db9a6e5eaa58627ff7815ba37c3 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Wed, 11 May 2022 23:47:06 +0900 Subject: [PATCH 14/22] fix: lstm net --- handyrl/envs/gfootball.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 5b972526..c930bf80 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -132,7 +132,7 @@ def forward(self, x, hidden): h, c_ = block(h, hidden_) next_hidden.append((h, c_)) - h = F.relu_(h) + h = F.relu_(self.fc3(h)) p = self.fcp(h) v = self.fcv(h) r = self.fcr(h) From 3ef775af5e01c31d6f0fc10fde4f8b8c58197696 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Thu, 12 May 2022 02:00:20 +0900 Subject: [PATCH 15/22] feature: subjective feature --- handyrl/envs/gfootball.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index c930bf80..eca7e6b9 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -111,7 +111,7 @@ def __init__(self): units = 128 self.units = units - self.fc1 = nn.Linear(125, units) + self.fc1 = nn.Linear(133, units) self.fc2 = nn.Linear(units, units) self.rnn_blocks = nn.ModuleList([nn.LSTMCell(units, units) for _ in range(4)]) self.fc3 = nn.Linear(units, units) @@ -212,6 +212,15 @@ def do_flatten(obj): # sticky actions o.extend(obs['sticky_actions']) + # subjective pose + if obs['active'] != -1: + o.extend(obs['left_team'][obs['active']]) + o.extend(obs['left_team_direction'][obs['active']]) + o.extend(obs['ball'][:2] - obs['left_team'][obs['active']]) + o.extend(obs['ball_direction'][:2] - obs['left_team_direction'][obs['active']]) + else: + o.extend([-1] * 8) + final_obs.append(o) return np.array(final_obs, dtype=np.float32) From d9b89670cfe1350383dddd10d5c1aefcf564b684 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Thu, 12 May 2022 09:27:15 +0900 Subject: [PATCH 16/22] fix: apply TanH to heads --- handyrl/envs/gfootball.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index eca7e6b9..a9046bbb 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -134,8 +134,8 @@ def forward(self, x, hidden): h = F.relu_(self.fc3(h)) p = self.fcp(h) - v = self.fcv(h) - r = self.fcr(h) + v = torch.tanh(self.fcv(h)) + r = torch.tanh(self.fcr(h)) return {'policy': p, 'value': v, 'return': r, 'hidden': next_hidden} From 13bc972d6858dd0760478e8fe6e3842379afbe97 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Thu, 12 May 2022 12:11:27 +0900 Subject: [PATCH 17/22] fix: apply tanh for v, r heads --- handyrl/envs/gfootball.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 6ed6c147..a6aecf76 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -24,8 +24,8 @@ def __init__(self, units0, units1): def forward(self, x): h = F.relu_(self.bn(self.fc(x))) p = self.head_p(h) - v = self.head_v(h) - r = self.head_r(h) + v = torch.tanh(self.head_v(h)) + r = torch.tanh(self.head_r(h)) return {'policy': p, 'value': v, 'return': r} class CNNModel(nn.Module): From 833660cc9f81e8eb044c6ba580731e1d0acd5bfe Mon Sep 17 00:00:00 2001 From: YuriCat Date: Thu, 12 May 2022 20:42:04 +0900 Subject: [PATCH 18/22] feature: show flags --- handyrl/envs/gfootball.py | 16 ++++++++++++---- handyrl/evaluation.py | 2 ++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index b45a194a..cd8d1e8f 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -674,20 +674,28 @@ def __init__(self, args=None): self.limit_step = args.get('limit_step', 1000) def reset(self, args=None): + if args is None: + args = {} + show = args.get('show', False) + if self.env is None: from gfootball.env import create_environment self.env = create_environment( env_name="11_vs_11_stochastic", representation='raw', - write_full_episode_dumps=True, + write_full_episode_dumps=show, logdir='videos', - write_video=True, + write_video=show, number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS, number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS, - other_config_options={'action_set': 'v2'}) + other_config_options={ + 'action_set': 'v2', + 'video_format': 'webm', + }) - self.env.render() + if show: + self.env.render() obs = self.env.reset() self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True) diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py index 248f5b6c..84ab4cd8 100755 --- a/handyrl/evaluation.py +++ b/handyrl/evaluation.py @@ -81,6 +81,7 @@ def observe(self, player): def exec_match(env, agents, critic=None, show=False, game_args={}): ''' match with shared game environment ''' + game_args['show'] = show if env.reset(game_args): return None for agent in agents.values(): @@ -110,6 +111,7 @@ def exec_match(env, agents, critic=None, show=False, game_args={}): def exec_network_match(env, network_agents, critic=None, show=False, game_args={}): ''' match with divided game environment ''' + game_args['show'] = show if env.reset(game_args): return None for p, agent in network_agents.items(): From 7abab58e93abc9b2ecb8b8dbb1096525b8e0cbac Mon Sep 17 00:00:00 2001 From: YuriCat Date: Thu, 12 May 2022 21:11:49 +0900 Subject: [PATCH 19/22] feature: render only in show mode --- handyrl/envs/gfootball.py | 7 +++++-- handyrl/evaluation.py | 17 +++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index cd8d1e8f..18042f2a 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -108,7 +108,7 @@ def forward(self, x, hidden): class FootballRecurrentNet(nn.Module): def __init__(self): super().__init__() - units = 128 + units = 192 self.units = units self.fc1 = nn.Linear(133, units) @@ -744,6 +744,9 @@ def terminal(self): or len(self.states) > self.limit_step \ or (self.FINISH_BY_GOAL and sum(self.score().values()) > 0) + def view_transition(self): + print(self.states[-1]['action']) + def score(self): if len(self.states) == 0: return [0, 0] @@ -773,7 +776,7 @@ def outcome(self): def legal_actions(self, player, number=0): # legal action list - return list(range(self.ACTION_LEN)) + return [e for e in Action] def raw_observation(self, player): return self.states[-1]['observation'][player] diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py index 84ab4cd8..1a425c63 100755 --- a/handyrl/evaluation.py +++ b/handyrl/evaluation.py @@ -380,14 +380,18 @@ def eval_main(args, argv): prepare_env(env_args) env = make_env(env_args) - model_path = argv[0] if len(argv) >= 1 else 'models/latest.pth' + model_paths = argv[0].split(':') if len(argv) >= 1 else ['models/latest.pth'] num_games = int(argv[1]) if len(argv) >= 2 else 100 num_process = int(argv[2]) if len(argv) >= 3 else 1 - agent1 = build_agent(model_path, env) - if agent1 is None: - model = load_model(model_path, env.net()) - agent1 = Agent(model) + def resolve_agent(model_path): + agent = build_agent(model_path, env) + if agent is None: + model = load_model(model_path, env.net()) + agent = Agent(model) + return agent + + main_agent = resolve_agent(model_paths[0]) critic = None print('%d process, %d games' % (num_process, num_games)) @@ -395,7 +399,8 @@ def eval_main(args, argv): seed = random.randrange(1e8) print('seed = %d' % seed) - agents = [agent1] + [RandomAgent() for _ in range(len(env.players()) - 1)] + opponent = model_paths[1] if len(model_paths) > 1 else 'random' + agents = [main_agent] + [resolve_agent(opponent) for _ in range(len(env.players()) - 1)] evaluate_mp(env, agents, critic, env_args, {'default': {}}, num_process, num_games, seed) From 1102e3c290112a4101a28cb7835e4488949591d8 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Thu, 12 May 2022 22:32:13 +0900 Subject: [PATCH 20/22] fix: remove debug print --- handyrl/envs/gfootball.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 18042f2a..8c7b8a2f 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -757,8 +757,6 @@ def reward(self): prev_score = self.prev_score score = self.score() - print(prev_score, score) - rewards = {} for p in self.players(): r = 1.0 * (score[p] - prev_score[p]) - 1.0 * (score[1 - p] - prev_score[1 - p]) From 2a7e657419644e11dff214e7bfc18d4572c0e1b7 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Sat, 14 May 2022 17:07:36 +0900 Subject: [PATCH 21/22] feature: make feature with index --- handyrl/envs/gfootball.py | 116 ++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 60 deletions(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 8c7b8a2f..29ecab99 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -142,7 +142,7 @@ def forward(self, x, hidden): # https://github.com/google-research/football/blob/12f93de031e7f7c105f32924d113b1f7e6d77349/gfootball/env/wrappers.py -def convert_observation_115_plus_alpha(observation, fixed_positions): +def convert_observation_115_plus_alpha(obs, num, fixed_positions): """Converts an observation into simple115 (or simple115v2) format. Args: observation: observation that the environment returns @@ -165,65 +165,61 @@ def do_flatten(obj): return np.array(obj).flatten() return obj.flatten() - final_obs = [] - for obs in observation: - o = [] - if fixed_positions: - for i, name in enumerate(['left_team', 'left_team_direction', - 'right_team', 'right_team_direction']): - o.extend(do_flatten(obs[name])) - # If there were less than 11vs11 players we backfill missing values - # with -1. - if len(o) < (i + 1) * 22: - o.extend([-1] * ((i + 1) * 22 - len(o))) - else: - o.extend(do_flatten(obs['left_team'])) - o.extend(do_flatten(obs['left_team_direction'])) - o.extend(do_flatten(obs['right_team'])) - o.extend(do_flatten(obs['right_team_direction'])) - - # If there were less than 11vs11 players we backfill missing values with - # -1. - # 88 = 11 (players) * 2 (teams) * 2 (positions & directions) * 2 (x & y) - if len(o) < 88: - o.extend([-1] * (88 - len(o))) - - # ball position - o.extend(obs['ball']) - # ball direction - o.extend(obs['ball_direction']) - # one hot encoding of which team owns the ball - if obs['ball_owned_team'] == -1: - o.extend([1, 0, 0]) - if obs['ball_owned_team'] == 0: - o.extend([0, 1, 0]) - if obs['ball_owned_team'] == 1: - o.extend([0, 0, 1]) - - active = [0] * 11 - if obs['active'] != -1: - active[obs['active']] = 1 - o.extend(active) - - game_mode = [0] * 7 - game_mode[obs['game_mode']] = 1 - o.extend(game_mode) - - # sticky actions - o.extend(obs['sticky_actions']) - - # subjective pose - if obs['active'] != -1: - o.extend(obs['left_team'][obs['active']]) - o.extend(obs['left_team_direction'][obs['active']]) - o.extend(obs['ball'][:2] - obs['left_team'][obs['active']]) - o.extend(obs['ball_direction'][:2] - obs['left_team_direction'][obs['active']]) - else: - o.extend([-1] * 8) - - final_obs.append(o) + o = [] + if fixed_positions: + for i, name in enumerate(['left_team', 'left_team_direction', + 'right_team', 'right_team_direction']): + o.extend(do_flatten(obs[name])) + # If there were less than 11vs11 players we backfill missing values + # with -1. + if len(o) < (i + 1) * 22: + o.extend([-1] * ((i + 1) * 22 - len(o))) + else: + o.extend(do_flatten(obs['left_team'])) + o.extend(do_flatten(obs['left_team_direction'])) + o.extend(do_flatten(obs['right_team'])) + o.extend(do_flatten(obs['right_team_direction'])) + + # If there were less than 11vs11 players we backfill missing values with + # -1. + # 88 = 11 (players) * 2 (teams) * 2 (positions & directions) * 2 (x & y) + if len(o) < 88: + o.extend([-1] * (88 - len(o))) + + # ball position + o.extend(obs['ball']) + # ball direction + o.extend(obs['ball_direction']) + # one hot encoding of which team owns the ball + if obs['ball_owned_team'] == -1: + o.extend([1, 0, 0]) + if obs['ball_owned_team'] == 0: + o.extend([0, 1, 0]) + if obs['ball_owned_team'] == 1: + o.extend([0, 0, 1]) + + active = [0] * 11 + if obs['active'] != -1: + active[obs['active']] = 1 + o.extend(active) + + game_mode = [0] * 7 + game_mode[obs['game_mode']] = 1 + o.extend(game_mode) + + # sticky actions + o.extend(obs['sticky_actions']) + + # subjective pose + if obs['active'] != -1: + o.extend(obs['left_team'][obs['active']]) + o.extend(obs['left_team_direction'][obs['active']]) + o.extend(obs['ball'][:2] - obs['left_team'][obs['active']]) + o.extend(obs['ball_direction'][:2] - obs['left_team_direction'][obs['active']]) + else: + o.extend([-1] * 8) - return np.array(final_obs, dtype=np.float32) + return np.array(o, dtype=np.float32) # feature @@ -784,7 +780,7 @@ def observation(self, player, number=0): info = {'half_step': self.half_step} index = player * self.CONTROLLED_PLAYERS + number #return feature_from_states(self.states, info, ) - return convert_observation_115_plus_alpha(self.states[-1]['observation'], True)[index] + return convert_observation_115_plus_alpha(self.states[-1]['observation'][index], index, True) def _preprocess_state(self, state): if state is None: From 73e5405285eb241ad455fc7bf46da97cc551e0c9 Mon Sep 17 00:00:00 2001 From: YuriCat Date: Fri, 20 May 2022 01:22:26 +0900 Subject: [PATCH 22/22] feature: update football environment --- handyrl/envs/gfootball.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py index 29ecab99..4ec71176 100644 --- a/handyrl/envs/gfootball.py +++ b/handyrl/envs/gfootball.py @@ -108,7 +108,7 @@ def forward(self, x, hidden): class FootballRecurrentNet(nn.Module): def __init__(self): super().__init__() - units = 192 + units = 256 self.units = units self.fc1 = nn.Linear(133, units) @@ -740,6 +740,9 @@ def terminal(self): or len(self.states) > self.limit_step \ or (self.FINISH_BY_GOAL and sum(self.score().values()) > 0) + def __str__(self): + return 'step ' + str(len(self.states)) + ' ' + str(list(self.score().values())) + def view_transition(self): print(self.states[-1]['action'])