From 2630f04a4ce28bcc5f38a173bd1dffd39032ec6c Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 05:43:45 +0900
Subject: [PATCH 01/22] feature: add google research football environment

---
 handyrl/envs/gfootball.py | 674 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 674 insertions(+)
 create mode 100644 handyrl/envs/gfootball.py

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
new file mode 100644
index 00000000..f7e1dca1
--- /dev/null
+++ b/handyrl/envs/gfootball.py
@@ -0,0 +1,674 @@
+import random
+import copy
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from handyrl.environment import BaseEnvironment
+
+
+
+class FootballNet(nn.Module):
+    class FootballHead(nn.Module):
+        def __init__(self, units0, units1):
+            super().__init__()
+            self.fc = nn.Linear(units0, units1)
+            self.bn = nn.BatchNorm1d(units1)
+            self.head_p = nn.Linear(units1, 19, bias=False)
+            self.head_v = nn.Linear(units1, 1, bias=False)
+            self.head_r = nn.Linear(units1, 1, bias=False)
+
+        def forward(self, x):
+            h = F.relu_(self.bn(self.fc(x)))
+            p = self.head_p(h)
+            v = self.head_v(h)
+            r = self.head_r(h)
+            return {'policy': p, 'value': v, 'return': r}
+
+    class CNNModel(nn.Module):
+        def __init__(self, final_filters):
+            super().__init__()
+            self.conv1 = nn.Sequential(
+                nn.Conv2d(53, 128, kernel_size=1, stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(128, 160, kernel_size=1, stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(160, 128, kernel_size=1, stride=1, bias=False),
+                nn.ReLU(inplace=True)
+            )
+            self.pool1 = nn.AdaptiveAvgPool2d((1, 11))
+            self.conv2 = nn.Sequential(
+                nn.BatchNorm2d(128),
+                nn.Conv2d(128, 160, kernel_size=(1, 1), stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(160),
+                nn.Conv2d(160, 96, kernel_size=(1, 1), stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(96),
+                nn.Conv2d(96, final_filters, kernel_size=(1, 1), stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(final_filters),
+            )
+            self.pool2 = nn.AdaptiveAvgPool2d((1, 1))
+            self.flatten = nn.Flatten()
+
+        def forward(self, x):
+            x = x['cnn_feature']
+            x = self.conv1(x)
+            x = self.pool1(x)
+            x = self.conv2(x)
+            x = self.pool2(x)
+            x = self.flatten(x)
+            return x
+
+    class ActionHistoryEncoder(nn.Module):
+        def __init__(self, hidden_size=64, num_layers=2):
+            super().__init__()
+            self.action_emd = nn.Embedding(19, 8)
+            self.rnn = nn.GRU(8, hidden_size, num_layers, batch_first=True)
+
+        def forward(self, x):
+            h = self.action_emd(x['action_history'])
+            h = h.squeeze(dim=2)
+            self.rnn.flatten_parameters()
+            h, _ = self.rnn(h)
+            return h
+
+    def __init__(self):
+        super().__init__()
+
+        self.cnn = self.CNNModel(64)  # to control
+        self.rnn = self.ActionHistoryEncoder(64, 2)
+        self.head = self.FootballHead(157, 64)
+
+    def forward(self, x, hidden):
+        cnn_h = self.cnn(x)
+        rnn_h = self.rnn(x)
+
+        h = torch.cat([
+            cnn_h.view(cnn_h.size(0), -1),
+            rnn_h[:, -1, :],
+            x['ball'],
+            x['match'],
+            x['control']], -1)
+        o = self.head(h)
+
+        return o
+
+
+# feature
+def feature_from_states(states, info, number):
+    # observation list to input tensor
+
+    HISTORY_LENGTH = 8
+
+    obs_history_ = [s['observation'][number] for s in reversed(states[-HISTORY_LENGTH:])]
+    obs_history = obs_history_ + [obs_history_[-1]] * (HISTORY_LENGTH - len(obs_history_))
+    obs = obs_history[0]
+
+    action_history_ = [s['action'][number] for s in reversed(states[-HISTORY_LENGTH:])]
+    action_history = action_history_ + [0] * (HISTORY_LENGTH - len(action_history_ ))
+
+    """
+    ・left players (x)
+    ・left players (y)
+    ・right players (x)
+    ・right players (y)
+    ・ball (x)
+    ・ball (y)
+    ・left goal (x)
+    ・left goal (y)
+    ・right goal (x)
+    ・right goal (y)
+    ・active (x)
+    ・active (y)
+
+    ・left players (x) - right players (x)
+    ・left players (y) - right players (y)
+    ・left players (x) - ball (x)
+    ・left players (y) - ball (y)
+    ・left players (x) - goal (x)
+    ・left players (y) - goal (y)
+    ・left players (x) - active (x)
+    ・left players (y) - active (y)
+
+    ・left players direction (x)
+    ・left players direction (y)
+    ・right players direction (x)
+    ・right players direction (y)
+    ・left players direction (x) - right players direction (x)
+    ・left players direction (y) - right players direction (y)
+    """
+
+    # left players
+    obs_left_team = np.array(obs['left_team'])
+    left_player_x = np.repeat(obs_left_team[:, 0][..., None], 11, axis=1)
+    left_player_y = np.repeat(obs_left_team[:, 1][..., None], 11, axis=1)
+
+    # right players
+    obs_right_team = np.array(obs['right_team'])
+    right_player_x = np.repeat(obs_right_team[:, 0][..., None], 11, axis=1).transpose(1, 0)
+    right_player_y = np.repeat(obs_right_team[:, 1][..., None], 11, axis=1).transpose(1, 0)
+
+    # ball
+    obs_ball = np.array(obs['ball'])
+    ball_x = np.ones((11, 11)) * obs_ball[0]
+    ball_y = np.ones((11, 11)) * obs_ball[1]
+    ball_z = np.ones((11, 11)) * obs_ball[2]
+
+    # goal
+    left_goal, right_goal = [-1, 0], [1, 0]
+    left_goal_x = np.ones((11, 11)) * left_goal[0]
+    left_goal_y = np.ones((11, 11)) * left_goal[1]
+    right_goal_x = np.ones((11, 11)) * right_goal[0]
+    right_goal_y = np.ones((11, 11)) * right_goal[1]
+
+    # side line
+    side_line_y = [-.42, .42]
+    side_line_y_top = np.ones((11, 11)) * side_line_y[0]
+    side_line_y_bottom = np.ones((11, 11)) * side_line_y[1]
+
+    # active
+    active = np.array(obs['active'])
+    active_player_x = np.repeat(obs_left_team[active][0][..., None, None], 11, axis=1).repeat(11, axis=0)
+    active_player_y = np.repeat(obs_left_team[active][1][..., None, None], 11, axis=1).repeat(11, axis=0)
+
+    # left players - right players
+    left_minus_right_player_x = obs_left_team[:, 0][..., None] - obs_right_team[:, 0]
+    left_minus_right_player_y = obs_left_team[:, 1][..., None] - obs_right_team[:, 1]
+
+    # left players - ball
+    left_minus_ball_x = (obs_left_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1)
+    left_minus_ball_y = (obs_left_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1)
+
+    # left players - right goal
+    left_minus_right_goal_x = (obs_left_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1)
+    left_minus_right_goal_y = (obs_left_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1)
+
+    # left players - left goal
+    left_minus_left_goal_x = (obs_left_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1)
+    left_minus_left_goal_y = (obs_left_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1)
+
+    # right players - right goal
+    right_minus_right_goal_x = (obs_right_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_right_goal_y = (obs_right_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1).transpose(1, 0)
+
+    # right players - left goal
+    right_minus_left_goal_x = (obs_right_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_left_goal_y = (obs_right_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1).transpose(1, 0)
+
+    # left players (x) - active
+    left_minus_active_x = (obs_left_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1)
+    left_minus_active_y = (obs_left_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1)
+
+    # right player - ball
+    right_minus_ball_x = (obs_right_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_ball_y = (obs_right_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1).transpose(1, 0)
+
+    # right player - active
+    right_minus_active_x = (obs_right_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_active_y = (obs_right_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1).transpose(1, 0)
+
+    # left player - side line
+    left_minus_side_top = np.abs(obs_left_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1)
+    left_minus_side_bottom = np.abs(obs_left_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1)
+
+    # right player - side line
+    right_minus_side_top = np.abs(obs_right_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_side_bottom = np.abs(obs_right_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1).transpose(1, 0)
+
+    # left players direction
+    obs_left_team_direction = np.array(obs['left_team_direction'])
+    left_player_direction_x = np.repeat(obs_left_team_direction[:, 0][..., None], 11, axis=1)
+    left_player_direction_y = np.repeat(obs_left_team_direction[:, 1][..., None], 11, axis=1)
+
+    # right players direction
+    obs_right_team_direction = np.array(obs['right_team_direction'])
+    right_player_direction_x = np.repeat(obs_right_team_direction[:, 0][..., None], 11, axis=1).transpose(1, 0)
+    right_player_direction_y = np.repeat(obs_right_team_direction[:, 1][..., None], 11, axis=1).transpose(1, 0)
+
+    # ball direction
+    obs_ball_direction = np.array(obs['ball_direction'])
+    ball_direction_x = np.ones((11, 11)) * obs_ball_direction[0]
+    ball_direction_y = np.ones((11, 11)) * obs_ball_direction[1]
+    ball_direction_z = np.ones((11, 11)) * obs_ball_direction[2]
+
+    # left players direction - right players direction
+    left_minus_right_player_direction_x = obs_left_team_direction[:, 0][..., None] - obs_right_team_direction[:, 0]
+    left_minus_right_player_direction_y = obs_left_team_direction[:, 1][..., None] - obs_right_team_direction[:, 1]
+
+    # left players direction - ball direction
+    left_minus_ball_direction_x = (obs_left_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1)
+    left_minus_ball_direction_y = (obs_left_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1)
+
+    # right players direction - ball direction
+    right_minus_ball_direction_x = (obs_right_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_ball_direction_y = (obs_right_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1).transpose(1, 0)
+
+    # ball rotation
+    obs_ball_rotation = np.array(obs['ball_rotation'])
+    ball_rotation_x = np.ones((11, 11)) * obs_ball_rotation[0]
+    ball_rotation_y = np.ones((11, 11)) * obs_ball_rotation[1]
+    ball_rotation_z = np.ones((11, 11)) * obs_ball_rotation[2]
+
+    cnn_feature = np.stack([
+        left_player_x,
+        left_player_y,
+        right_player_x,
+        right_player_y,
+        ball_x,
+        ball_y,
+        ball_z,
+        left_goal_x,
+        left_goal_y,
+        right_goal_x,
+        right_goal_y,
+        side_line_y_top,
+        side_line_y_bottom,
+        active_player_x,
+        active_player_y,
+        left_minus_right_player_x,
+        left_minus_right_player_y,
+        left_minus_right_goal_x,
+        left_minus_right_goal_y,
+        left_minus_left_goal_x,
+        left_minus_left_goal_y,
+        right_minus_right_goal_x,
+        right_minus_right_goal_y,
+        right_minus_left_goal_x,
+        right_minus_left_goal_y,
+        left_minus_side_top,
+        left_minus_side_bottom,
+        right_minus_side_top,
+        right_minus_side_bottom,
+        right_minus_ball_x,
+        right_minus_ball_y,
+        right_minus_active_x,
+        right_minus_active_y,
+        left_minus_ball_x,
+        left_minus_ball_y,
+        left_minus_active_x,
+        left_minus_active_y,
+        ball_direction_x,
+        ball_direction_y,
+        ball_direction_z,
+        left_minus_ball_direction_x,
+        left_minus_ball_direction_y,
+        right_minus_ball_direction_x,
+        right_minus_ball_direction_y,
+        left_player_direction_x,
+        left_player_direction_y,
+        right_player_direction_x,
+        right_player_direction_y,
+        left_minus_right_player_direction_x,
+        left_minus_right_player_direction_y,
+        ball_rotation_x,
+        ball_rotation_y,
+        ball_rotation_z,
+    ], axis=0).astype(np.float32)
+
+    # ball
+    BALL_OWEND_1HOT = {-1: [0, 0], 0: [1, 0], 1: [0, 1]}
+    ball_owned_team_ = obs['ball_owned_team']
+    ball_owned_team = BALL_OWEND_1HOT[ball_owned_team_]  # {-1, 0, 1} None, self, opponent
+    PLAYER_1HOT = np.concatenate([np.eye(11), np.zeros((1, 11))])
+    ball_owned_player_ = PLAYER_1HOT[obs['ball_owned_player']]  # {-1, N-1}
+    if ball_owned_team_ == -1:
+        my_ball_owned_player = PLAYER_1HOT[-1]
+        op_ball_owned_player = PLAYER_1HOT[-1]
+    elif ball_owned_team_ == 0:
+        my_ball_owned_player = ball_owned_player_
+        op_ball_owned_player = PLAYER_1HOT[-1]
+    else:
+        my_ball_owned_player = PLAYER_1HOT[-1]
+        op_ball_owned_player = ball_owned_player_
+
+    ball_features = np.concatenate([
+        obs['ball'],
+        obs['ball_direction'],
+        obs['ball_rotation']
+    ]).astype(np.float32)
+
+    # self team
+    left_team_features = np.concatenate([
+        [[1] for _ in obs['left_team']],  # left team flag
+        obs['left_team'],  # position
+        obs['left_team_direction'],
+        [[v] for v in obs['left_team_tired_factor']],
+        [[v] for v in obs['left_team_yellow_card']],
+        [[v] for v in obs['left_team_active']],
+        my_ball_owned_player[...,np.newaxis]
+    ], axis=1).astype(np.float32)
+
+    left_team_indice = np.arange(0, 11, dtype=np.int32)
+
+    # opponent team
+    right_team_features = np.concatenate([
+        [[0] for _ in obs['right_team']],  # right team flag
+        obs['right_team'],  # position
+        obs['right_team_direction'],
+        [[v] for v in obs['right_team_tired_factor']],
+        [[v] for v in obs['right_team_yellow_card']],
+        [[v] for v in obs['right_team_active']],
+        op_ball_owned_player[...,np.newaxis]
+    ], axis=1).astype(np.float32)
+
+    right_team_indice = np.arange(0, 11, dtype=np.int32)
+
+    # distance information
+    def get_distance(xy1, xy2):
+        return (((xy1 - xy2) ** 2).sum(axis=-1)) ** 0.5
+
+    def get_line_distance(x1, x2):
+        return np.abs(x1 - x2)
+
+    def multi_scale(x, scale):
+        return 2 / (1 + np.exp(-np.array(x)[..., np.newaxis] / np.array(scale)))
+
+    both_team = np.array(obs['left_team'] + obs['right_team'], dtype=np.float32)
+    ball = np.array([obs['ball'][:2]], dtype=np.float32)
+    goal = np.array([[-1, 0], [1, 0]], dtype=np.float32)
+    goal_line_x = np.array([-1, 1], dtype=np.float32)
+    side_line_y = np.array([-.42, .42], dtype=np.float32)
+
+    # ball <-> goal, goal line, side line distance
+    b2g_distance = get_distance(ball, goal)
+    b2gl_distance = get_line_distance(ball[0][0], goal_line_x)
+    b2sl_distance = get_line_distance(ball[0][1], side_line_y)
+    b2o_distance = np.concatenate([
+        b2g_distance, b2gl_distance, b2sl_distance
+    ], axis=-1)
+
+    # player <-> ball, goal, back line, side line distance
+    p2b_distance = get_distance(both_team[:,np.newaxis,:], ball[np.newaxis,:,:])
+    p2g_distance = get_distance(both_team[:,np.newaxis,:], goal[np.newaxis,:,:])
+    p2gl_distance = get_line_distance(both_team[:,:1], goal_line_x[np.newaxis,:])
+    p2sl_distance = get_line_distance(both_team[:,1:], side_line_y[np.newaxis,:])
+    p2bo_distance = np.concatenate([
+        p2b_distance, p2g_distance, p2gl_distance, p2sl_distance
+    ], axis=-1)
+
+    # player <-> player distance
+    p2p_distance = get_distance(both_team[:,np.newaxis,:], both_team[np.newaxis,:,:])
+
+    # controlled player information
+    control_flag_ = np.array(PLAYER_1HOT[obs['active']], dtype=np.float32)
+    control_flag = np.concatenate([control_flag_, np.zeros(len(obs['right_team']), dtype=np.float32)])[...,np.newaxis]
+
+    # controlled status information
+    DIR = [
+        [-1, 0], [-.707, -.707], [0,  1], [ .707, -.707],  # L, TL, T, TR
+        [ 1, 0], [ .707,  .707], [0, -1], [-.707,  .707]   # R, BR, B, BL
+    ]
+
+    sticky_direction = DIR[np.where(obs['sticky_actions'][:8] == 1)[0][0]] if 1 in obs['sticky_actions'][:8] else [0, 0]
+    sticky_flags = obs['sticky_actions'][8:]
+
+    control_features = np.concatenate([
+        sticky_direction,
+        sticky_flags,
+    ]).astype(np.float32)
+
+    # Match state
+    if obs['steps_left'] > info['half_step']:
+        steps_left_half = obs['steps_left'] - info['half_step']
+    else:
+        steps_left_half = obs['steps_left']
+    match_features = np.concatenate([
+        multi_scale(obs['score'], [1, 3]).ravel(),
+        multi_scale(obs['score'][0] - obs['score'][1], [1, 3]),
+        multi_scale(obs['steps_left'], [10, 100, 1000, 10000]),
+        multi_scale(steps_left_half, [10, 100, 1000, 10000]),
+        ball_owned_team,
+    ]).astype(np.float32)
+
+    mode_index = np.array([obs['game_mode']], dtype=np.int32)
+
+    action_history = np.array(action_history, dtype=np.int32)[..., None]
+
+    return {
+        # features
+        'ball': ball_features,
+        'match': match_features,
+        'player': {
+            'self': left_team_features,
+            'opp': right_team_features
+        },
+        'control': control_features,
+        'player_index': {
+            'self': left_team_indice,
+            'opp': right_team_indice
+        },
+        'mode_index': mode_index,
+        'control_flag': control_flag,
+        # distances
+        'distance': {
+            'p2p': p2p_distance,
+            'p2bo': p2bo_distance,
+            'b2o': b2o_distance
+        },
+        # CNN
+        'cnn_feature': cnn_feature,
+        'action_history': action_history
+    }
+
+
+# https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/football/helpers.py
+
+import enum
+
+class Action(enum.IntEnum):
+    Idle = 0
+    Left = 1
+    TopLeft = 2
+    Top = 3
+    TopRight = 4
+    Right = 5
+    BottomRight = 6
+    Bottom = 7
+    BottomLeft = 8
+    LongPass= 9
+    HighPass = 10
+    ShortPass = 11
+    Shot = 12
+    Sprint = 13
+    ReleaseDirection = 14
+    ReleaseSprint = 15
+    Slide = 16
+    Dribble = 17
+    ReleaseDribble = 18
+
+sticky_index_to_action = [
+    Action.Left,
+    Action.TopLeft,
+    Action.Top,
+    Action.TopRight,
+    Action.Right,
+    Action.BottomRight,
+    Action.Bottom,
+    Action.BottomLeft,
+    Action.Sprint,
+    Action.Dribble
+]
+
+action_to_sticky_index = {
+    a: index for index, a in enumerate(sticky_index_to_action)
+}
+
+class PlayerRole(enum.IntEnum):
+    GoalKeeper = 0
+    CenterBack = 1
+    LeftBack = 2
+    RightBack = 3
+    DefenceMidfield = 4
+    CentralMidfield = 5
+    LeftMidfield = 6
+    RIghtMidfield = 7
+    AttackMidfield = 8
+    CentralFront = 9
+
+
+class GameMode(enum.IntEnum):
+    Normal = 0
+    KickOff = 1
+    GoalKick = 2
+    FreeKick = 3
+    Corner = 4
+    ThrowIn = 5
+    Penalty = 6
+
+
+class Environment(BaseEnvironment):
+    ACTION_LEN = 19
+    CONTROLLED_PLAYERS = 1
+
+    def __init__(self, args=None):
+        self.env = None
+        args = args if args is not None else {}
+        self.limit_step = args.get('limit_step', 600)
+        self.controlled_players = 1
+
+    def reset(self, args=None):
+        if self.env is None:
+            from gfootball.env import create_environment
+
+            self.env = create_environment(
+                env_name="11_vs_11_stochastic",
+                representation='raw',
+                number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS,
+                number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS)
+
+        obs = self.env.reset()
+        self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True)
+
+    def update(self, state, reset):
+        if reset:
+            self.done = False
+            self.prev_score = [0, 0]
+            self.states = []
+            self.half_step = 1500
+            self.reserved_action = [None, None]
+        else:
+            self.prev_score = self.score()
+
+        state = copy.deepcopy(state)
+        state = self._preprocess_state(state)
+        self.states.append(state)
+
+        if reset:
+            self.half_step = state['observation'][0]['steps_left'] // 2
+
+    def step(self, actions):
+        # state transition function
+        # action is integer (0 ~ 18)
+        actions = copy.deepcopy(actions)
+        for i, res_action in enumerate(self.reserved_action):
+            if res_action is not None:
+                actions[i] = res_action
+
+        # step environment
+        flat_actions = [actions[0], actions[1]]
+        obs, _, self.done, _ = self.env.step(flat_actions)
+        self.update({'observation': obs, 'action': flat_actions}, reset=False)
+
+    def diff_info(self):
+        return self.states[-1]
+
+    def turns(self):
+        return self.players()
+
+    def players(self):
+        return [0, 1]
+
+    def terminal(self):
+        # check whether the state is terminal
+        return self.done \
+            or len(self.states) > self.limit_step \
+            or sum(self.score().values()) > 0  # finish after first goal
+
+    def score(self):
+        if len(self.states) == 0:
+            return [0, 0]
+        state = self.states[-1]
+        return {p: state['observation'][0]['score'][p] for p in self.players()}
+
+    def reward(self):
+        prev_score = self.prev_score
+        score = self.score()
+
+        rewards = {}
+        for p in self.players():
+            r = 1.0 * (score[p] - prev_score[p]) - 1.0 * (score[1 - p] - prev_score[1 - p])
+            rewards[p] = r
+
+        return rewards
+
+    def outcome(self):
+        scores = self.score()
+        if scores[0] > scores[1]:
+            return [1, -1]
+        elif scores[0] < scores[1]:
+            return [-1, 1]
+        return [0, 0]
+
+    def legal_actions(self, player, number=0):
+        # legal action list
+        return list(range(self.ACTION_LEN))
+
+    def raw_observation(self, player):
+        return self.states[-1]['observation'][player]
+
+    def observation(self, player, number=0):
+        # input feature for neural nets
+        info = {'half_step': self.half_step}
+        return feature_from_states(self.states, info, player * self.CONTROLLED_PLAYERS + number)
+
+    def _preprocess_state(self, state):
+        if state is None:
+            return state
+
+        # in ball-dead state, set ball owned player and team
+        for o in state['observation']:
+            mode = o['game_mode']
+            if mode == GameMode.FreeKick or \
+                mode == GameMode.Corner or \
+                mode == GameMode.Penalty or \
+                mode == GameMode.GoalKick:
+                # find nearest player and team
+                def dist(xy1, xy2):
+                    return ((xy1[0] - xy2[0]) ** 2 + (xy1[1] - xy2[1]) ** 2) ** 0.5
+                team_player_position = [(0, i, p) for i, p in enumerate(o['left_team'])] + \
+                    [(1, i, p) for i, p in enumerate(o['right_team'])]
+                distances = [(t[0], t[1], dist(t[2], o['ball'][:2])) for t in team_player_position]
+                distances = sorted(distances, key=lambda x: x[2])
+
+                o['ball_owned_team'] = distances[0][0]
+                o['ball_owned_player'] = distances[0][1]
+
+        return state
+
+    def net(self):
+        return FootballNet()
+
+
+if __name__ == '__main__':
+    e = Environment()
+    for _ in range(1):
+        e.reset()
+        o = e.observation(0)
+        while not e.terminal():
+            # print(e)
+            _ = e.observation(0)
+            _ = e.observation(1)
+            #print(e.raw_observation(0)[0]['steps_left'])
+            action_list = [0, 0]
+            action_list[0] = random.choice(e.legal_actions(0))
+            action_list[1] = random.choice(e.legal_actions(1))
+            print(len(e.states), action_list)
+            e.step(action_list)
+            print(e.reward())
+        print(e.score())
+        print(e.outcome())

From adf111fa8a21a725f5210427dc0ccf5274736aee Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 06:05:18 +0900
Subject: [PATCH 02/22] feature: set action set=v2

---
 handyrl/envs/gfootball.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index f7e1dca1..67e73d9d 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -524,6 +524,7 @@ class GameMode(enum.IntEnum):
 class Environment(BaseEnvironment):
     ACTION_LEN = 19
     CONTROLLED_PLAYERS = 1
+    FINISH_BY_GOAL = True
 
     def __init__(self, args=None):
         self.env = None
@@ -539,7 +540,8 @@ def reset(self, args=None):
                 env_name="11_vs_11_stochastic",
                 representation='raw',
                 number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS,
-                number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS)
+                number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS,
+                other_config_options={'action_set': 'v2'})
 
         obs = self.env.reset()
         self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True)
@@ -587,7 +589,7 @@ def terminal(self):
         # check whether the state is terminal
         return self.done \
             or len(self.states) > self.limit_step \
-            or sum(self.score().values()) > 0  # finish after first goal
+            or (self.FINISH_BY_GOAL and sum(self.score().values()) > 0)
 
     def score(self):
         if len(self.states) == 0:
@@ -609,10 +611,10 @@ def reward(self):
     def outcome(self):
         scores = self.score()
         if scores[0] > scores[1]:
-            return [1, -1]
+            return {0: 1, 1: -1}
         elif scores[0] < scores[1]:
-            return [-1, 1]
-        return [0, 0]
+            return {0: -1, 1: 1}
+        return {0: 0, 1: 0}
 
     def legal_actions(self, player, number=0):
         # legal action list
@@ -666,9 +668,8 @@ def net(self):
             #print(e.raw_observation(0)[0]['steps_left'])
             action_list = [0, 0]
             action_list[0] = random.choice(e.legal_actions(0))
-            action_list[1] = random.choice(e.legal_actions(1))
+            action_list[1] = 19
             print(len(e.states), action_list)
             e.step(action_list)
-            print(e.reward())
-        print(e.score())
+            print(e.score())
         print(e.outcome())

From 2aaf70a22babea7a01d44b9d1b1b817a85e825c4 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 06:34:50 +0900
Subject: [PATCH 03/22] feature: add rulebase action (builin ai)

---
 handyrl/envs/gfootball.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 67e73d9d..140799f0 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -652,6 +652,9 @@ def dist(xy1, xy2):
 
         return state
 
+    def rule_based_action(self, player=None, number=0):
+        return 19
+
     def net(self):
         return FootballNet()
 

From 473b047c919c3c99a04ed7ce88dc2202513ea68f Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 06:57:36 +0900
Subject: [PATCH 04/22] feature: rulebase agents in football environment

---
 handyrl/agent.py          |  5 ++++-
 handyrl/envs/gfootball.py | 12 ++++++++++--
 handyrl/evaluation.py     |  5 +++--
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/handyrl/agent.py b/handyrl/agent.py
index cbb5c961..71a2dde6 100755
--- a/handyrl/agent.py
+++ b/handyrl/agent.py
@@ -23,9 +23,12 @@ def observe(self, env, player, show=False):
 
 
 class RuleBasedAgent(RandomAgent):
+    def __init__(self, key=None):
+        self.key = None
+
     def action(self, env, player, show=False):
         if hasattr(env, 'rule_based_action'):
-            return env.rule_based_action(player)
+            return env.rule_based_action(player, key=self.key)
         else:
             return random.choice(env.legal_actions(player))
 
diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 140799f0..8f3f0881 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -652,8 +652,16 @@ def dist(xy1, xy2):
 
         return state
 
-    def rule_based_action(self, player=None, number=0):
-        return 19
+    def rule_based_action(self, player=None, number=0, key=None):
+        if key is None:
+            key = 'builtin_ai'
+
+        if key == 'builtin_ai':
+            return 19
+        elif key == 'idle':
+            return 14
+        elif key == 'right':
+            return 5
 
     def net(self):
         return FootballNet()
diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py
index ad770e30..4bb28c16 100755
--- a/handyrl/evaluation.py
+++ b/handyrl/evaluation.py
@@ -143,8 +143,9 @@ def exec_network_match(env, network_agents, critic=None, show=False, game_args={
 def build_agent(raw, env=None):
     if raw == 'random':
         return RandomAgent()
-    elif raw == 'rulebase':
-        return RuleBasedAgent()
+    elif raw.startswith('rulebase'):
+        key = rulebase.split('-')[1] if '-' in raw else None
+        return RuleBasedAgent(key)
     return None
 
 

From 4e01997f43f28dfef5324559ef341ccec319559c Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 06:59:08 +0900
Subject: [PATCH 05/22] fix: rulebase agent key

---
 handyrl/evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py
index 4bb28c16..720ea5f9 100755
--- a/handyrl/evaluation.py
+++ b/handyrl/evaluation.py
@@ -144,7 +144,7 @@ def build_agent(raw, env=None):
     if raw == 'random':
         return RandomAgent()
     elif raw.startswith('rulebase'):
-        key = rulebase.split('-')[1] if '-' in raw else None
+        key = raw.split('-')[1] if '-' in raw else None
         return RuleBasedAgent(key)
     return None
 

From 306f20c482942433de7499b416039f694a8f4f57 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 07:06:49 +0900
Subject: [PATCH 06/22] fix: rulebase key

---
 handyrl/agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/handyrl/agent.py b/handyrl/agent.py
index 71a2dde6..c2e0b78f 100755
--- a/handyrl/agent.py
+++ b/handyrl/agent.py
@@ -24,7 +24,7 @@ def observe(self, env, player, show=False):
 
 class RuleBasedAgent(RandomAgent):
     def __init__(self, key=None):
-        self.key = None
+        self.key = key
 
     def action(self, env, player, show=False):
         if hasattr(env, 'rule_based_action'):

From 44a72621da04e3fba863272f640ebea3699b9905 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 07:54:35 +0900
Subject: [PATCH 07/22] feature: cnn feature save memory

---
 handyrl/envs/gfootball.py | 170 +++++++++++++++++++++-----------------
 1 file changed, 95 insertions(+), 75 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 8f3f0881..742b797d 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -57,6 +57,12 @@ def __init__(self, final_filters):
 
         def forward(self, x):
             x = x['cnn_feature']
+            x = torch.cat([
+                x['2d'],
+                x['left'].unsqueeze(-1).repeat(1, 1, 1, 11),
+                x['right'].unsqueeze(-2).repeat(1, 1, 11, 1),
+                x['scalar'].unsqueeze(-1).unsqueeze(-1).repeat(1, 1, 11, 11),
+            ], 1)
             x = self.conv1(x)
             x = self.pool1(x)
             x = self.conv2(x)
@@ -145,120 +151,118 @@ def feature_from_states(states, info, number):
 
     # left players
     obs_left_team = np.array(obs['left_team'])
-    left_player_x = np.repeat(obs_left_team[:, 0][..., None], 11, axis=1)
-    left_player_y = np.repeat(obs_left_team[:, 1][..., None], 11, axis=1)
+    left_player_x = obs_left_team[:, 0]
+    left_player_y = obs_left_team[:, 1]
 
     # right players
     obs_right_team = np.array(obs['right_team'])
-    right_player_x = np.repeat(obs_right_team[:, 0][..., None], 11, axis=1).transpose(1, 0)
-    right_player_y = np.repeat(obs_right_team[:, 1][..., None], 11, axis=1).transpose(1, 0)
+    right_player_x = obs_right_team[:, 0]
+    right_player_y = obs_right_team[:, 1]
 
     # ball
     obs_ball = np.array(obs['ball'])
-    ball_x = np.ones((11, 11)) * obs_ball[0]
-    ball_y = np.ones((11, 11)) * obs_ball[1]
-    ball_z = np.ones((11, 11)) * obs_ball[2]
+    ball_x = obs_ball[0]
+    ball_y = obs_ball[1]
+    ball_z = obs_ball[2]
 
     # goal
     left_goal, right_goal = [-1, 0], [1, 0]
-    left_goal_x = np.ones((11, 11)) * left_goal[0]
-    left_goal_y = np.ones((11, 11)) * left_goal[1]
-    right_goal_x = np.ones((11, 11)) * right_goal[0]
-    right_goal_y = np.ones((11, 11)) * right_goal[1]
+    left_goal_x = left_goal[0]
+    left_goal_y = left_goal[1]
+    right_goal_x = right_goal[0]
+    right_goal_y = right_goal[1]
 
     # side line
     side_line_y = [-.42, .42]
-    side_line_y_top = np.ones((11, 11)) * side_line_y[0]
-    side_line_y_bottom = np.ones((11, 11)) * side_line_y[1]
+    side_line_y_top = side_line_y[0]
+    side_line_y_bottom = side_line_y[1]
 
     # active
     active = np.array(obs['active'])
-    active_player_x = np.repeat(obs_left_team[active][0][..., None, None], 11, axis=1).repeat(11, axis=0)
-    active_player_y = np.repeat(obs_left_team[active][1][..., None, None], 11, axis=1).repeat(11, axis=0)
+    active_player_x = obs_left_team[active][0]
+    active_player_y = obs_left_team[active][1]
 
     # left players - right players
     left_minus_right_player_x = obs_left_team[:, 0][..., None] - obs_right_team[:, 0]
     left_minus_right_player_y = obs_left_team[:, 1][..., None] - obs_right_team[:, 1]
 
     # left players - ball
-    left_minus_ball_x = (obs_left_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1)
-    left_minus_ball_y = (obs_left_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1)
+    left_minus_ball_x = obs_left_team[:, 0] - obs_ball[0]
+    left_minus_ball_y = obs_left_team[:, 1] - obs_ball[1]
 
     # left players - right goal
-    left_minus_right_goal_x = (obs_left_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1)
-    left_minus_right_goal_y = (obs_left_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1)
+    left_minus_right_goal_x = obs_left_team[:, 0] - right_goal[0]
+    left_minus_right_goal_y = obs_left_team[:, 1] - right_goal[1]
 
     # left players - left goal
-    left_minus_left_goal_x = (obs_left_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1)
-    left_minus_left_goal_y = (obs_left_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1)
+    left_minus_left_goal_x = obs_left_team[:, 0] - left_goal[0]
+    left_minus_left_goal_y = obs_left_team[:, 1] - left_goal[1]
 
     # right players - right goal
-    right_minus_right_goal_x = (obs_right_team[:, 0][..., None] - right_goal[0]).repeat(11, axis=1).transpose(1, 0)
-    right_minus_right_goal_y = (obs_right_team[:, 1][..., None] - right_goal[1]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_right_goal_x = obs_right_team[:, 0] - right_goal[0]
+    right_minus_right_goal_y = obs_right_team[:, 1] - right_goal[1]
 
     # right players - left goal
-    right_minus_left_goal_x = (obs_right_team[:, 0][..., None] - left_goal[0]).repeat(11, axis=1).transpose(1, 0)
-    right_minus_left_goal_y = (obs_right_team[:, 1][..., None] - left_goal[1]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_left_goal_x = obs_right_team[:, 0] - left_goal[0]
+    right_minus_left_goal_y = obs_right_team[:, 1] - left_goal[1]
 
     # left players (x) - active
-    left_minus_active_x = (obs_left_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1)
-    left_minus_active_y = (obs_left_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1)
+    left_minus_active_x = obs_left_team[:, 0] - obs_left_team[active][0]
+    left_minus_active_y = obs_left_team[:, 1] - obs_left_team[active][1]
 
     # right player - ball
-    right_minus_ball_x = (obs_right_team[:, 0][..., None] - obs_ball[0]).repeat(11, axis=1).transpose(1, 0)
-    right_minus_ball_y = (obs_right_team[:, 1][..., None] - obs_ball[1]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_ball_x = obs_right_team[:, 0] - obs_ball[0]
+    right_minus_ball_y = obs_right_team[:, 1] - obs_ball[1]
 
     # right player - active
-    right_minus_active_x = (obs_right_team[:, 0][..., None] - obs_left_team[active][0]).repeat(11, axis=1).transpose(1, 0)
-    right_minus_active_y = (obs_right_team[:, 1][..., None] - obs_left_team[active][1]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_active_x = obs_right_team[:, 0] - obs_left_team[active][0]
+    right_minus_active_y = obs_right_team[:, 1] - obs_left_team[active][1]
 
     # left player - side line
-    left_minus_side_top = np.abs(obs_left_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1)
-    left_minus_side_bottom = np.abs(obs_left_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1)
+    left_minus_side_top = np.abs(obs_left_team[:, 1] - side_line_y[0])
+    left_minus_side_bottom = np.abs(obs_left_team[:, 1] - side_line_y[1])
 
     # right player - side line
-    right_minus_side_top = np.abs(obs_right_team[:, 1][..., None] - side_line_y[0]).repeat(11, axis=1).transpose(1, 0)
-    right_minus_side_bottom = np.abs(obs_right_team[:, 1][..., None] - side_line_y[1]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_side_top = np.abs(obs_right_team[:, 1] - side_line_y[0])
+    right_minus_side_bottom = np.abs(obs_right_team[:, 1] - side_line_y[1])
 
     # left players direction
     obs_left_team_direction = np.array(obs['left_team_direction'])
-    left_player_direction_x = np.repeat(obs_left_team_direction[:, 0][..., None], 11, axis=1)
-    left_player_direction_y = np.repeat(obs_left_team_direction[:, 1][..., None], 11, axis=1)
+    left_player_direction_x = obs_left_team_direction[:, 0]
+    left_player_direction_y = obs_left_team_direction[:, 1]
 
     # right players direction
     obs_right_team_direction = np.array(obs['right_team_direction'])
-    right_player_direction_x = np.repeat(obs_right_team_direction[:, 0][..., None], 11, axis=1).transpose(1, 0)
-    right_player_direction_y = np.repeat(obs_right_team_direction[:, 1][..., None], 11, axis=1).transpose(1, 0)
+    right_player_direction_x = obs_right_team_direction[:, 0]
+    right_player_direction_y = obs_right_team_direction[:, 1]
 
     # ball direction
     obs_ball_direction = np.array(obs['ball_direction'])
-    ball_direction_x = np.ones((11, 11)) * obs_ball_direction[0]
-    ball_direction_y = np.ones((11, 11)) * obs_ball_direction[1]
-    ball_direction_z = np.ones((11, 11)) * obs_ball_direction[2]
+    ball_direction_x = obs_ball_direction[0]
+    ball_direction_y = obs_ball_direction[1]
+    ball_direction_z = obs_ball_direction[2]
 
     # left players direction - right players direction
     left_minus_right_player_direction_x = obs_left_team_direction[:, 0][..., None] - obs_right_team_direction[:, 0]
     left_minus_right_player_direction_y = obs_left_team_direction[:, 1][..., None] - obs_right_team_direction[:, 1]
 
     # left players direction - ball direction
-    left_minus_ball_direction_x = (obs_left_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1)
-    left_minus_ball_direction_y = (obs_left_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1)
+    left_minus_ball_direction_x = obs_left_team_direction[:, 0] - obs_ball_direction[0]
+    left_minus_ball_direction_y = obs_left_team_direction[:, 1] - obs_ball_direction[1]
 
     # right players direction - ball direction
-    right_minus_ball_direction_x = (obs_right_team_direction[:, 0][..., None] - obs_ball_direction[0]).repeat(11, axis=1).transpose(1, 0)
-    right_minus_ball_direction_y = (obs_right_team_direction[:, 1][..., None] - obs_ball_direction[1]).repeat(11, axis=1).transpose(1, 0)
+    right_minus_ball_direction_x = obs_right_team_direction[:, 0] - obs_ball_direction[0]
+    right_minus_ball_direction_y = obs_right_team_direction[:, 1] - obs_ball_direction[1]
 
     # ball rotation
     obs_ball_rotation = np.array(obs['ball_rotation'])
-    ball_rotation_x = np.ones((11, 11)) * obs_ball_rotation[0]
-    ball_rotation_y = np.ones((11, 11)) * obs_ball_rotation[1]
-    ball_rotation_z = np.ones((11, 11)) * obs_ball_rotation[2]
+    ball_rotation_x = obs_ball_rotation[0]
+    ball_rotation_y = obs_ball_rotation[1]
+    ball_rotation_z = obs_ball_rotation[2]
 
-    cnn_feature = np.stack([
-        left_player_x,
-        left_player_y,
-        right_player_x,
-        right_player_y,
+    cnn_scalar = np.stack([
+        active_player_x,
+        active_player_y,
         ball_x,
         ball_y,
         ball_z,
@@ -268,46 +272,57 @@ def feature_from_states(states, info, number):
         right_goal_y,
         side_line_y_top,
         side_line_y_bottom,
-        active_player_x,
-        active_player_y,
-        left_minus_right_player_x,
-        left_minus_right_player_y,
+        ball_direction_x,
+        ball_direction_y,
+        ball_direction_z,
+        ball_rotation_x,
+        ball_rotation_y,
+        ball_rotation_z,
+    ]).astype(np.float32)
+
+    cnn_left = np.stack([
+        left_player_x,
+        left_player_y,
+        left_minus_active_x,
+        left_minus_active_y,
         left_minus_right_goal_x,
         left_minus_right_goal_y,
         left_minus_left_goal_x,
         left_minus_left_goal_y,
+        left_minus_side_top,
+        left_minus_side_bottom,
+        left_minus_ball_x,
+        left_minus_ball_y,
+        left_minus_ball_direction_x,
+        left_minus_ball_direction_y,
+    ]).astype(np.float32)
+
+    cnn_right = np.stack([
+        right_player_x,
+        right_player_y,
+        right_minus_active_x,
+        right_minus_active_y,
         right_minus_right_goal_x,
         right_minus_right_goal_y,
         right_minus_left_goal_x,
         right_minus_left_goal_y,
-        left_minus_side_top,
-        left_minus_side_bottom,
         right_minus_side_top,
         right_minus_side_bottom,
         right_minus_ball_x,
         right_minus_ball_y,
-        right_minus_active_x,
-        right_minus_active_y,
-        left_minus_ball_x,
-        left_minus_ball_y,
-        left_minus_active_x,
-        left_minus_active_y,
-        ball_direction_x,
-        ball_direction_y,
-        ball_direction_z,
-        left_minus_ball_direction_x,
-        left_minus_ball_direction_y,
         right_minus_ball_direction_x,
         right_minus_ball_direction_y,
         left_player_direction_x,
         left_player_direction_y,
         right_player_direction_x,
         right_player_direction_y,
+    ]).astype(np.float32)
+
+    cnn_2d = np.stack([
+        left_minus_right_player_x,
+        left_minus_right_player_y,
         left_minus_right_player_direction_x,
         left_minus_right_player_direction_y,
-        ball_rotation_x,
-        ball_rotation_y,
-        ball_rotation_z,
     ], axis=0).astype(np.float32)
 
     # ball
@@ -451,7 +466,12 @@ def multi_scale(x, scale):
             'b2o': b2o_distance
         },
         # CNN
-        'cnn_feature': cnn_feature,
+        'cnn_feature': {
+            'scalar': cnn_scalar,
+            'left': cnn_left,
+            'right': cnn_right,
+            '2d': cnn_2d
+        },
         'action_history': action_history
     }
 

From 1c77cb0a5fa2607bd50e709f35ad2cfff1395279 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 07:55:52 +0900
Subject: [PATCH 08/22] fix: cnn feature save memory

---
 handyrl/envs/gfootball.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 742b797d..6928ed43 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -295,6 +295,8 @@ def feature_from_states(states, info, number):
         left_minus_ball_y,
         left_minus_ball_direction_x,
         left_minus_ball_direction_y,
+        left_player_direction_x,
+        left_player_direction_y,
     ]).astype(np.float32)
 
     cnn_right = np.stack([
@@ -312,8 +314,6 @@ def feature_from_states(states, info, number):
         right_minus_ball_y,
         right_minus_ball_direction_x,
         right_minus_ball_direction_y,
-        left_player_direction_x,
-        left_player_direction_y,
         right_player_direction_x,
         right_player_direction_y,
     ]).astype(np.float32)

From 0a5d6c35d813db9e5005e0a0b259fb2427ae7b30 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 08:49:28 +0900
Subject: [PATCH 09/22] feature: reverse history

---
 handyrl/envs/gfootball.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 6928ed43..999d0437 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -111,12 +111,12 @@ def feature_from_states(states, info, number):
 
     HISTORY_LENGTH = 8
 
-    obs_history_ = [s['observation'][number] for s in reversed(states[-HISTORY_LENGTH:])]
-    obs_history = obs_history_ + [obs_history_[-1]] * (HISTORY_LENGTH - len(obs_history_))
-    obs = obs_history[0]
+    obs_history_ = [s['observation'][number] for s in states[-HISTORY_LENGTH:]]
+    obs_history = [obs_history_[0]] * (HISTORY_LENGTH - len(obs_history_)) + obs_history_
+    obs = obs_history[-1]
 
-    action_history_ = [s['action'][number] for s in reversed(states[-HISTORY_LENGTH:])]
-    action_history = action_history_ + [0] * (HISTORY_LENGTH - len(action_history_ ))
+    action_history_ = [s['action'][number] for s in states[-HISTORY_LENGTH:]]
+    action_history = [0] * (HISTORY_LENGTH - len(action_history_ )) + action_history_
 
     """
     ・left players (x)

From f72c390d4cf3b0ec1f6cd0d00d3bb22df3c90a0e Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 16:09:25 +0900
Subject: [PATCH 10/22] experiment: render football output

---
 handyrl/envs/gfootball.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 999d0437..d7df6e48 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -18,15 +18,19 @@ def __init__(self, units0, units1):
             self.fc = nn.Linear(units0, units1)
             self.bn = nn.BatchNorm1d(units1)
             self.head_p = nn.Linear(units1, 19, bias=False)
-            self.head_v = nn.Linear(units1, 1, bias=False)
+            #self.head_v = nn.Linear(units1, 1, bias=False)
             self.head_r = nn.Linear(units1, 1, bias=False)
 
         def forward(self, x):
             h = F.relu_(self.bn(self.fc(x)))
             p = self.head_p(h)
-            v = self.head_v(h)
+            #v = self.head_v(h)
             r = self.head_r(h)
-            return {'policy': p, 'value': v, 'return': r}
+            return {
+                'policy': p,
+                #'value': v,
+                'return': r
+            }
 
     class CNNModel(nn.Module):
         def __init__(self, final_filters):
@@ -109,7 +113,7 @@ def forward(self, x, hidden):
 def feature_from_states(states, info, number):
     # observation list to input tensor
 
-    HISTORY_LENGTH = 8
+    HISTORY_LENGTH = 20
 
     obs_history_ = [s['observation'][number] for s in states[-HISTORY_LENGTH:]]
     obs_history = [obs_history_[0]] * (HISTORY_LENGTH - len(obs_history_)) + obs_history_
@@ -549,7 +553,7 @@ class Environment(BaseEnvironment):
     def __init__(self, args=None):
         self.env = None
         args = args if args is not None else {}
-        self.limit_step = args.get('limit_step', 600)
+        self.limit_step = args.get('limit_step', 1000)
         self.controlled_players = 1
 
     def reset(self, args=None):
@@ -559,10 +563,14 @@ def reset(self, args=None):
             self.env = create_environment(
                 env_name="11_vs_11_stochastic",
                 representation='raw',
+                write_full_episode_dumps=True,
+                logdir='videos',
+                write_video=True,
                 number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS,
                 number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS,
-                other_config_options={'action_set': 'v2'})
+                other_config_options={'action_set': 'v2', 'video_quality_level': 2})
 
+        self.env.render()
         obs = self.env.reset()
         self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True)
 
@@ -621,6 +629,8 @@ def reward(self):
         prev_score = self.prev_score
         score = self.score()
 
+        print(prev_score, score)
+
         rewards = {}
         for p in self.players():
             r = 1.0 * (score[p] - prev_score[p]) - 1.0 * (score[1 - p] - prev_score[1 - p])
@@ -702,5 +712,7 @@ def net(self):
             action_list[1] = 19
             print(len(e.states), action_list)
             e.step(action_list)
-            print(e.score())
+            print(e.reward())
+            if sum(e.score().values()) > 0:
+                print('goal!')
         print(e.outcome())

From 5455f15bb47912eee24eb0aa2576f742cc5657f3 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 21:46:58 +0900
Subject: [PATCH 11/22] feature: update fottball environment

---
 handyrl/envs/gfootball.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 999d0437..6ed6c147 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -109,7 +109,7 @@ def forward(self, x, hidden):
 def feature_from_states(states, info, number):
     # observation list to input tensor
 
-    HISTORY_LENGTH = 8
+    HISTORY_LENGTH = 20
 
     obs_history_ = [s['observation'][number] for s in states[-HISTORY_LENGTH:]]
     obs_history = [obs_history_[0]] * (HISTORY_LENGTH - len(obs_history_)) + obs_history_
@@ -323,7 +323,7 @@ def feature_from_states(states, info, number):
         left_minus_right_player_y,
         left_minus_right_player_direction_x,
         left_minus_right_player_direction_y,
-    ], axis=0).astype(np.float32)
+    ]).astype(np.float32)
 
     # ball
     BALL_OWEND_1HOT = {-1: [0, 0], 0: [1, 0], 1: [0, 1]}
@@ -448,23 +448,23 @@ def multi_scale(x, scale):
         # features
         'ball': ball_features,
         'match': match_features,
-        'player': {
-            'self': left_team_features,
-            'opp': right_team_features
-        },
+        #'player': {
+        #    'self': left_team_features,
+        #    'opp': right_team_features
+        #},
         'control': control_features,
-        'player_index': {
-            'self': left_team_indice,
-            'opp': right_team_indice
-        },
+        #'player_index': {
+        #    'self': left_team_indice,
+        #    'opp': right_team_indice
+        #},
         'mode_index': mode_index,
         'control_flag': control_flag,
         # distances
-        'distance': {
-            'p2p': p2p_distance,
-            'p2bo': p2bo_distance,
-            'b2o': b2o_distance
-        },
+        #'distance': {
+        #    'p2p': p2p_distance,
+        #    'p2bo': p2bo_distance,
+        #    'b2o': b2o_distance
+        #},
         # CNN
         'cnn_feature': {
             'scalar': cnn_scalar,
@@ -549,8 +549,7 @@ class Environment(BaseEnvironment):
     def __init__(self, args=None):
         self.env = None
         args = args if args is not None else {}
-        self.limit_step = args.get('limit_step', 600)
-        self.controlled_players = 1
+        self.limit_step = args.get('limit_step', 1000)
 
     def reset(self, args=None):
         if self.env is None:

From 9860e07ffea88afd5e950943946fa056e603cec7 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 23 Apr 2022 21:50:06 +0900
Subject: [PATCH 12/22] feature: change render option

---
 handyrl/envs/gfootball.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 3917ac82..42ac63dc 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -567,7 +567,7 @@ def reset(self, args=None):
                 write_video=True,
                 number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS,
                 number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS,
-                other_config_options={'action_set': 'v2', 'video_quality_level': 2})
+                other_config_options={'action_set': 'v2'})
 
         self.env.render()
         obs = self.env.reset()

From 855ade724a4330cb69d518e7fb0d5ba79b172a67 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Wed, 11 May 2022 23:20:16 +0900
Subject: [PATCH 13/22] feature: 115+alpha feature, lstm net

---
 handyrl/envs/gfootball.py | 120 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 118 insertions(+), 2 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 6ed6c147..5b972526 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -105,7 +105,120 @@ def forward(self, x, hidden):
         return o
 
 
+class FootballRecurrentNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        units = 128
+
+        self.units = units
+        self.fc1 = nn.Linear(125, units)
+        self.fc2 = nn.Linear(units, units)
+        self.rnn_blocks = nn.ModuleList([nn.LSTMCell(units, units) for _ in range(4)])
+        self.fc3 = nn.Linear(units, units)
+        self.fcp = nn.Linear(units, 19, bias=False)
+        self.fcv = nn.Linear(units, 1, bias=False)
+        self.fcr = nn.Linear(units, 1, bias=False)
+
+    def init_hidden(self, batch_size):
+        return [(torch.zeros(*batch_size, self.units),
+                 torch.zeros(*batch_size, self.units)) for _ in self.rnn_blocks]
+
+    def forward(self, x, hidden):
+        h = x
+        h = F.relu_(self.fc1(h))
+        h = F.relu_(self.fc2(h))
+        next_hidden = []
+        for block, hidden_ in zip(self.rnn_blocks, hidden):
+            h, c_ = block(h, hidden_)
+            next_hidden.append((h, c_))
+
+        h = F.relu_(h)
+        p = self.fcp(h)
+        v = self.fcv(h)
+        r = self.fcr(h)
+
+        return {'policy': p, 'value': v, 'return': r, 'hidden': next_hidden}
+
+
+# https://github.com/google-research/football/blob/12f93de031e7f7c105f32924d113b1f7e6d77349/gfootball/env/wrappers.py
+
+def convert_observation_115_plus_alpha(observation, fixed_positions):
+    """Converts an observation into simple115 (or simple115v2) format.
+    Args:
+      observation: observation that the environment returns
+      fixed_positions: Players and positions are always occupying 88 fields
+                       (even if the game is played 1v1).
+                       If True, the position of the player will be the same - no
+                       matter how many players are on the field:
+                       (so first 11 pairs will belong to the first team, even
+                       if it has less players).
+                       If False, then the position of players from team2
+                       will depend on number of players in team1).
+    Returns:
+      (N, 115) shaped representation, where N stands for the number of players
+      being controlled.
+    """
+
+    def do_flatten(obj):
+        """Run flatten on either python list or numpy array."""
+        if type(obj) == list:
+            return np.array(obj).flatten()
+        return obj.flatten()
+
+    final_obs = []
+    for obs in observation:
+        o = []
+        if fixed_positions:
+            for i, name in enumerate(['left_team', 'left_team_direction',
+                                    'right_team', 'right_team_direction']):
+                o.extend(do_flatten(obs[name]))
+                # If there were less than 11vs11 players we backfill missing values
+                # with -1.
+                if len(o) < (i + 1) * 22:
+                    o.extend([-1] * ((i + 1) * 22 - len(o)))
+        else:
+            o.extend(do_flatten(obs['left_team']))
+            o.extend(do_flatten(obs['left_team_direction']))
+            o.extend(do_flatten(obs['right_team']))
+            o.extend(do_flatten(obs['right_team_direction']))
+
+        # If there were less than 11vs11 players we backfill missing values with
+        # -1.
+        # 88 = 11 (players) * 2 (teams) * 2 (positions & directions) * 2 (x & y)
+        if len(o) < 88:
+            o.extend([-1] * (88 - len(o)))
+
+        # ball position
+        o.extend(obs['ball'])
+        # ball direction
+        o.extend(obs['ball_direction'])
+        # one hot encoding of which team owns the ball
+        if obs['ball_owned_team'] == -1:
+            o.extend([1, 0, 0])
+        if obs['ball_owned_team'] == 0:
+            o.extend([0, 1, 0])
+        if obs['ball_owned_team'] == 1:
+            o.extend([0, 0, 1])
+
+        active = [0] * 11
+        if obs['active'] != -1:
+            active[obs['active']] = 1
+        o.extend(active)
+
+        game_mode = [0] * 7
+        game_mode[obs['game_mode']] = 1
+        o.extend(game_mode)
+
+        # sticky actions
+        o.extend(obs['sticky_actions'])
+
+        final_obs.append(o)
+
+    return np.array(final_obs, dtype=np.float32)
+
+
 # feature
+
 def feature_from_states(states, info, number):
     # observation list to input tensor
 
@@ -645,7 +758,9 @@ def raw_observation(self, player):
     def observation(self, player, number=0):
         # input feature for neural nets
         info = {'half_step': self.half_step}
-        return feature_from_states(self.states, info, player * self.CONTROLLED_PLAYERS + number)
+        index = player * self.CONTROLLED_PLAYERS + number
+        #return feature_from_states(self.states, info, )
+        return convert_observation_115_plus_alpha(self.states[-1]['observation'], True)[index]
 
     def _preprocess_state(self, state):
         if state is None:
@@ -683,7 +798,8 @@ def rule_based_action(self, player=None, number=0, key=None):
             return 5
 
     def net(self):
-        return FootballNet()
+        #return FootballNet()
+        return FootballRecurrentNet()
 
 
 if __name__ == '__main__':

From e70074738fb60db9a6e5eaa58627ff7815ba37c3 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Wed, 11 May 2022 23:47:06 +0900
Subject: [PATCH 14/22] fix: lstm net

---
 handyrl/envs/gfootball.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 5b972526..c930bf80 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -132,7 +132,7 @@ def forward(self, x, hidden):
             h, c_ = block(h, hidden_)
             next_hidden.append((h, c_))
 
-        h = F.relu_(h)
+        h = F.relu_(self.fc3(h))
         p = self.fcp(h)
         v = self.fcv(h)
         r = self.fcr(h)

From 3ef775af5e01c31d6f0fc10fde4f8b8c58197696 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Thu, 12 May 2022 02:00:20 +0900
Subject: [PATCH 15/22] feature: subjective feature

---
 handyrl/envs/gfootball.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index c930bf80..eca7e6b9 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -111,7 +111,7 @@ def __init__(self):
         units = 128
 
         self.units = units
-        self.fc1 = nn.Linear(125, units)
+        self.fc1 = nn.Linear(133, units)
         self.fc2 = nn.Linear(units, units)
         self.rnn_blocks = nn.ModuleList([nn.LSTMCell(units, units) for _ in range(4)])
         self.fc3 = nn.Linear(units, units)
@@ -212,6 +212,15 @@ def do_flatten(obj):
         # sticky actions
         o.extend(obs['sticky_actions'])
 
+        # subjective pose
+        if obs['active'] != -1:
+            o.extend(obs['left_team'][obs['active']])
+            o.extend(obs['left_team_direction'][obs['active']])
+            o.extend(obs['ball'][:2] - obs['left_team'][obs['active']])
+            o.extend(obs['ball_direction'][:2] - obs['left_team_direction'][obs['active']])
+        else:
+            o.extend([-1] * 8)
+
         final_obs.append(o)
 
     return np.array(final_obs, dtype=np.float32)

From d9b89670cfe1350383dddd10d5c1aefcf564b684 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Thu, 12 May 2022 09:27:15 +0900
Subject: [PATCH 16/22] fix: apply TanH to heads

---
 handyrl/envs/gfootball.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index eca7e6b9..a9046bbb 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -134,8 +134,8 @@ def forward(self, x, hidden):
 
         h = F.relu_(self.fc3(h))
         p = self.fcp(h)
-        v = self.fcv(h)
-        r = self.fcr(h)
+        v = torch.tanh(self.fcv(h))
+        r = torch.tanh(self.fcr(h))
 
         return {'policy': p, 'value': v, 'return': r, 'hidden': next_hidden}
 

From 13bc972d6858dd0760478e8fe6e3842379afbe97 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Thu, 12 May 2022 12:11:27 +0900
Subject: [PATCH 17/22] fix: apply tanh for v, r heads

---
 handyrl/envs/gfootball.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 6ed6c147..a6aecf76 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -24,8 +24,8 @@ def __init__(self, units0, units1):
         def forward(self, x):
             h = F.relu_(self.bn(self.fc(x)))
             p = self.head_p(h)
-            v = self.head_v(h)
-            r = self.head_r(h)
+            v = torch.tanh(self.head_v(h))
+            r = torch.tanh(self.head_r(h))
             return {'policy': p, 'value': v, 'return': r}
 
     class CNNModel(nn.Module):

From 833660cc9f81e8eb044c6ba580731e1d0acd5bfe Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Thu, 12 May 2022 20:42:04 +0900
Subject: [PATCH 18/22] feature: show flags

---
 handyrl/envs/gfootball.py | 16 ++++++++++++----
 handyrl/evaluation.py     |  2 ++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index b45a194a..cd8d1e8f 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -674,20 +674,28 @@ def __init__(self, args=None):
         self.limit_step = args.get('limit_step', 1000)
 
     def reset(self, args=None):
+        if args is None:
+            args = {}
+        show = args.get('show', False)
+
         if self.env is None:
             from gfootball.env import create_environment
 
             self.env = create_environment(
                 env_name="11_vs_11_stochastic",
                 representation='raw',
-                write_full_episode_dumps=True,
+                write_full_episode_dumps=show,
                 logdir='videos',
-                write_video=True,
+                write_video=show,
                 number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS,
                 number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS,
-                other_config_options={'action_set': 'v2'})
+                other_config_options={
+                    'action_set': 'v2',
+                    'video_format': 'webm',
+                })
 
-        self.env.render()
+        if show:
+            self.env.render()
         obs = self.env.reset()
         self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True)
 
diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py
index 248f5b6c..84ab4cd8 100755
--- a/handyrl/evaluation.py
+++ b/handyrl/evaluation.py
@@ -81,6 +81,7 @@ def observe(self, player):
 
 def exec_match(env, agents, critic=None, show=False, game_args={}):
     ''' match with shared game environment '''
+    game_args['show'] = show
     if env.reset(game_args):
         return None
     for agent in agents.values():
@@ -110,6 +111,7 @@ def exec_match(env, agents, critic=None, show=False, game_args={}):
 
 def exec_network_match(env, network_agents, critic=None, show=False, game_args={}):
     ''' match with divided game environment '''
+    game_args['show'] = show
     if env.reset(game_args):
         return None
     for p, agent in network_agents.items():

From 7abab58e93abc9b2ecb8b8dbb1096525b8e0cbac Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Thu, 12 May 2022 21:11:49 +0900
Subject: [PATCH 19/22] feature: render only in show mode

---
 handyrl/envs/gfootball.py |  7 +++++--
 handyrl/evaluation.py     | 17 +++++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index cd8d1e8f..18042f2a 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -108,7 +108,7 @@ def forward(self, x, hidden):
 class FootballRecurrentNet(nn.Module):
     def __init__(self):
         super().__init__()
-        units = 128
+        units = 192
 
         self.units = units
         self.fc1 = nn.Linear(133, units)
@@ -744,6 +744,9 @@ def terminal(self):
             or len(self.states) > self.limit_step \
             or (self.FINISH_BY_GOAL and sum(self.score().values()) > 0)
 
+    def view_transition(self):
+        print(self.states[-1]['action'])
+
     def score(self):
         if len(self.states) == 0:
             return [0, 0]
@@ -773,7 +776,7 @@ def outcome(self):
 
     def legal_actions(self, player, number=0):
         # legal action list
-        return list(range(self.ACTION_LEN))
+        return [e for e in Action]
 
     def raw_observation(self, player):
         return self.states[-1]['observation'][player]
diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py
index 84ab4cd8..1a425c63 100755
--- a/handyrl/evaluation.py
+++ b/handyrl/evaluation.py
@@ -380,14 +380,18 @@ def eval_main(args, argv):
     prepare_env(env_args)
     env = make_env(env_args)
 
-    model_path = argv[0] if len(argv) >= 1 else 'models/latest.pth'
+    model_paths = argv[0].split(':') if len(argv) >= 1 else ['models/latest.pth']
     num_games = int(argv[1]) if len(argv) >= 2 else 100
     num_process = int(argv[2]) if len(argv) >= 3 else 1
 
-    agent1 = build_agent(model_path, env)
-    if agent1 is None:
-        model = load_model(model_path, env.net())
-        agent1 = Agent(model)
+    def resolve_agent(model_path):
+        agent = build_agent(model_path, env)
+        if agent is None:
+            model = load_model(model_path, env.net())
+            agent = Agent(model)
+        return agent
+
+    main_agent = resolve_agent(model_paths[0])
     critic = None
 
     print('%d process, %d games' % (num_process, num_games))
@@ -395,7 +399,8 @@ def eval_main(args, argv):
     seed = random.randrange(1e8)
     print('seed = %d' % seed)
 
-    agents = [agent1] + [RandomAgent() for _ in range(len(env.players()) - 1)]
+    opponent = model_paths[1] if len(model_paths) > 1 else 'random'
+    agents = [main_agent] + [resolve_agent(opponent) for _ in range(len(env.players()) - 1)]
 
     evaluate_mp(env, agents, critic, env_args, {'default': {}}, num_process, num_games, seed)
 

From 1102e3c290112a4101a28cb7835e4488949591d8 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Thu, 12 May 2022 22:32:13 +0900
Subject: [PATCH 20/22] fix: remove debug print

---
 handyrl/envs/gfootball.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 18042f2a..8c7b8a2f 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -757,8 +757,6 @@ def reward(self):
         prev_score = self.prev_score
         score = self.score()
 
-        print(prev_score, score)
-
         rewards = {}
         for p in self.players():
             r = 1.0 * (score[p] - prev_score[p]) - 1.0 * (score[1 - p] - prev_score[1 - p])

From 2a7e657419644e11dff214e7bfc18d4572c0e1b7 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Sat, 14 May 2022 17:07:36 +0900
Subject: [PATCH 21/22] feature: make feature with index

---
 handyrl/envs/gfootball.py | 116 ++++++++++++++++++--------------------
 1 file changed, 56 insertions(+), 60 deletions(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 8c7b8a2f..29ecab99 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -142,7 +142,7 @@ def forward(self, x, hidden):
 
 # https://github.com/google-research/football/blob/12f93de031e7f7c105f32924d113b1f7e6d77349/gfootball/env/wrappers.py
 
-def convert_observation_115_plus_alpha(observation, fixed_positions):
+def convert_observation_115_plus_alpha(obs, num, fixed_positions):
     """Converts an observation into simple115 (or simple115v2) format.
     Args:
       observation: observation that the environment returns
@@ -165,65 +165,61 @@ def do_flatten(obj):
             return np.array(obj).flatten()
         return obj.flatten()
 
-    final_obs = []
-    for obs in observation:
-        o = []
-        if fixed_positions:
-            for i, name in enumerate(['left_team', 'left_team_direction',
-                                    'right_team', 'right_team_direction']):
-                o.extend(do_flatten(obs[name]))
-                # If there were less than 11vs11 players we backfill missing values
-                # with -1.
-                if len(o) < (i + 1) * 22:
-                    o.extend([-1] * ((i + 1) * 22 - len(o)))
-        else:
-            o.extend(do_flatten(obs['left_team']))
-            o.extend(do_flatten(obs['left_team_direction']))
-            o.extend(do_flatten(obs['right_team']))
-            o.extend(do_flatten(obs['right_team_direction']))
-
-        # If there were less than 11vs11 players we backfill missing values with
-        # -1.
-        # 88 = 11 (players) * 2 (teams) * 2 (positions & directions) * 2 (x & y)
-        if len(o) < 88:
-            o.extend([-1] * (88 - len(o)))
-
-        # ball position
-        o.extend(obs['ball'])
-        # ball direction
-        o.extend(obs['ball_direction'])
-        # one hot encoding of which team owns the ball
-        if obs['ball_owned_team'] == -1:
-            o.extend([1, 0, 0])
-        if obs['ball_owned_team'] == 0:
-            o.extend([0, 1, 0])
-        if obs['ball_owned_team'] == 1:
-            o.extend([0, 0, 1])
-
-        active = [0] * 11
-        if obs['active'] != -1:
-            active[obs['active']] = 1
-        o.extend(active)
-
-        game_mode = [0] * 7
-        game_mode[obs['game_mode']] = 1
-        o.extend(game_mode)
-
-        # sticky actions
-        o.extend(obs['sticky_actions'])
-
-        # subjective pose
-        if obs['active'] != -1:
-            o.extend(obs['left_team'][obs['active']])
-            o.extend(obs['left_team_direction'][obs['active']])
-            o.extend(obs['ball'][:2] - obs['left_team'][obs['active']])
-            o.extend(obs['ball_direction'][:2] - obs['left_team_direction'][obs['active']])
-        else:
-            o.extend([-1] * 8)
-
-        final_obs.append(o)
+    o = []
+    if fixed_positions:
+        for i, name in enumerate(['left_team', 'left_team_direction',
+                                'right_team', 'right_team_direction']):
+            o.extend(do_flatten(obs[name]))
+            # If there were less than 11vs11 players we backfill missing values
+            # with -1.
+            if len(o) < (i + 1) * 22:
+                o.extend([-1] * ((i + 1) * 22 - len(o)))
+    else:
+        o.extend(do_flatten(obs['left_team']))
+        o.extend(do_flatten(obs['left_team_direction']))
+        o.extend(do_flatten(obs['right_team']))
+        o.extend(do_flatten(obs['right_team_direction']))
+
+    # If there were less than 11vs11 players we backfill missing values with
+    # -1.
+    # 88 = 11 (players) * 2 (teams) * 2 (positions & directions) * 2 (x & y)
+    if len(o) < 88:
+        o.extend([-1] * (88 - len(o)))
+
+    # ball position
+    o.extend(obs['ball'])
+    # ball direction
+    o.extend(obs['ball_direction'])
+    # one hot encoding of which team owns the ball
+    if obs['ball_owned_team'] == -1:
+        o.extend([1, 0, 0])
+    if obs['ball_owned_team'] == 0:
+        o.extend([0, 1, 0])
+    if obs['ball_owned_team'] == 1:
+        o.extend([0, 0, 1])
+
+    active = [0] * 11
+    if obs['active'] != -1:
+        active[obs['active']] = 1
+    o.extend(active)
+
+    game_mode = [0] * 7
+    game_mode[obs['game_mode']] = 1
+    o.extend(game_mode)
+
+    # sticky actions
+    o.extend(obs['sticky_actions'])
+
+    # subjective pose
+    if obs['active'] != -1:
+        o.extend(obs['left_team'][obs['active']])
+        o.extend(obs['left_team_direction'][obs['active']])
+        o.extend(obs['ball'][:2] - obs['left_team'][obs['active']])
+        o.extend(obs['ball_direction'][:2] - obs['left_team_direction'][obs['active']])
+    else:
+        o.extend([-1] * 8)
 
-    return np.array(final_obs, dtype=np.float32)
+    return np.array(o, dtype=np.float32)
 
 
 # feature
@@ -784,7 +780,7 @@ def observation(self, player, number=0):
         info = {'half_step': self.half_step}
         index = player * self.CONTROLLED_PLAYERS + number
         #return feature_from_states(self.states, info, )
-        return convert_observation_115_plus_alpha(self.states[-1]['observation'], True)[index]
+        return convert_observation_115_plus_alpha(self.states[-1]['observation'][index], index, True)
 
     def _preprocess_state(self, state):
         if state is None:

From 73e5405285eb241ad455fc7bf46da97cc551e0c9 Mon Sep 17 00:00:00 2001
From: YuriCat <a.a.b.a.b.c.a.b.c.d.abcd1234@gmail.com>
Date: Fri, 20 May 2022 01:22:26 +0900
Subject: [PATCH 22/22] feature: update football environment

---
 handyrl/envs/gfootball.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
index 29ecab99..4ec71176 100644
--- a/handyrl/envs/gfootball.py
+++ b/handyrl/envs/gfootball.py
@@ -108,7 +108,7 @@ def forward(self, x, hidden):
 class FootballRecurrentNet(nn.Module):
     def __init__(self):
         super().__init__()
-        units = 192
+        units = 256
 
         self.units = units
         self.fc1 = nn.Linear(133, units)
@@ -740,6 +740,9 @@ def terminal(self):
             or len(self.states) > self.limit_step \
             or (self.FINISH_BY_GOAL and sum(self.score().values()) > 0)
 
+    def __str__(self):
+        return 'step ' + str(len(self.states)) + ' ' + str(list(self.score().values()))
+
     def view_transition(self):
         print(self.states[-1]['action'])