diff --git a/handyrl/envs/gfootball.py b/handyrl/envs/gfootball.py
new file mode 100644
index 00000000..4ec71176
--- /dev/null
+++ b/handyrl/envs/gfootball.py
@@ -0,0 +1,846 @@
+import random
+import copy
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from handyrl.environment import BaseEnvironment
+
+
+
+class FootballNet(nn.Module):
+    class FootballHead(nn.Module):
+        def __init__(self, units0, units1):
+            super().__init__()
+            self.fc = nn.Linear(units0, units1)
+            self.bn = nn.BatchNorm1d(units1)
+            self.head_p = nn.Linear(units1, 19, bias=False)
+            self.head_v = nn.Linear(units1, 1, bias=False)
+            self.head_r = nn.Linear(units1, 1, bias=False)
+
+        def forward(self, x):
+            h = F.relu_(self.bn(self.fc(x)))
+            p = self.head_p(h)
+            v = torch.tanh(self.head_v(h))
+            r = torch.tanh(self.head_r(h))
+            return {'policy': p, 'value': v, 'return': r}
+
+    class CNNModel(nn.Module):
+        def __init__(self, final_filters):
+            super().__init__()
+            self.conv1 = nn.Sequential(
+                nn.Conv2d(53, 128, kernel_size=1, stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(128, 160, kernel_size=1, stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(160, 128, kernel_size=1, stride=1, bias=False),
+                nn.ReLU(inplace=True)
+            )
+            self.pool1 = nn.AdaptiveAvgPool2d((1, 11))
+            self.conv2 = nn.Sequential(
+                nn.BatchNorm2d(128),
+                nn.Conv2d(128, 160, kernel_size=(1, 1), stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(160),
+                nn.Conv2d(160, 96, kernel_size=(1, 1), stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(96),
+                nn.Conv2d(96, final_filters, kernel_size=(1, 1), stride=1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.BatchNorm2d(final_filters),
+            )
+            self.pool2 = nn.AdaptiveAvgPool2d((1, 1))
+            self.flatten = nn.Flatten()
+
+        def forward(self, x):
+            x = x['cnn_feature']
+            x = torch.cat([
+                x['2d'],
+                x['left'].unsqueeze(-1).repeat(1, 1, 1, 11),
+                x['right'].unsqueeze(-2).repeat(1, 1, 11, 1),
+                x['scalar'].unsqueeze(-1).unsqueeze(-1).repeat(1, 1, 11, 11),
+            ], 1)
+            x = self.conv1(x)
+            x = self.pool1(x)
+            x = self.conv2(x)
+            x = self.pool2(x)
+            x = self.flatten(x)
+            return x
+
+    class ActionHistoryEncoder(nn.Module):
+        def __init__(self, hidden_size=64, num_layers=2):
+            super().__init__()
+            self.action_emd = nn.Embedding(19, 8)
+            self.rnn = nn.GRU(8, hidden_size, num_layers, batch_first=True)
+
+        def forward(self, x):
+            h = self.action_emd(x['action_history'])
+            h = h.squeeze(dim=2)
+            self.rnn.flatten_parameters()
+            h, _ = self.rnn(h)
+            return h
+
+    def __init__(self):
+        super().__init__()
+
+        self.cnn = self.CNNModel(64)  # to control
+        self.rnn = self.ActionHistoryEncoder(64, 2)
+        self.head = self.FootballHead(157, 64)
+
+    def forward(self, x, hidden):
+        cnn_h = self.cnn(x)
+        rnn_h = self.rnn(x)
+
+        h = torch.cat([
+            cnn_h.view(cnn_h.size(0), -1),
+            rnn_h[:, -1, :],
+            x['ball'],
+            x['match'],
+            x['control']], -1)
+        o = self.head(h)
+
+        return o
+
+
+class FootballRecurrentNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        units = 256
+
+        self.units = units
+        self.fc1 = nn.Linear(133, units)
+        self.fc2 = nn.Linear(units, units)
+        self.rnn_blocks = nn.ModuleList([nn.LSTMCell(units, units) for _ in range(4)])
+        self.fc3 = nn.Linear(units, units)
+        self.fcp = nn.Linear(units, 19, bias=False)
+        self.fcv = nn.Linear(units, 1, bias=False)
+        self.fcr = nn.Linear(units, 1, bias=False)
+
+    def init_hidden(self, batch_size):
+        return [(torch.zeros(*batch_size, self.units),
+                 torch.zeros(*batch_size, self.units)) for _ in self.rnn_blocks]
+
+    def forward(self, x, hidden):
+        h = x
+        h = F.relu_(self.fc1(h))
+        h = F.relu_(self.fc2(h))
+        next_hidden = []
+        for block, hidden_ in zip(self.rnn_blocks, hidden):
+            h, c_ = block(h, hidden_)
+            next_hidden.append((h, c_))
+
+        h = F.relu_(self.fc3(h))
+        p = self.fcp(h)
+        v = torch.tanh(self.fcv(h))
+        r = torch.tanh(self.fcr(h))
+
+        return {'policy': p, 'value': v, 'return': r, 'hidden': next_hidden}
+
+
+# https://github.com/google-research/football/blob/12f93de031e7f7c105f32924d113b1f7e6d77349/gfootball/env/wrappers.py
+
+def convert_observation_115_plus_alpha(obs, num, fixed_positions):
+    """Converts an observation into simple115 (or simple115v2) format.
+    Args:
+      observation: observation that the environment returns
+      fixed_positions: Players and positions are always occupying 88 fields
+                       (even if the game is played 1v1).
+                       If True, the position of the player will be the same - no
+                       matter how many players are on the field:
+                       (so first 11 pairs will belong to the first team, even
+                       if it has less players).
+                       If False, then the position of players from team2
+                       will depend on number of players in team1).
+    Returns:
+      (N, 115) shaped representation, where N stands for the number of players
+      being controlled.
+    """
+
+    def do_flatten(obj):
+        """Run flatten on either python list or numpy array."""
+        if type(obj) == list:
+            return np.array(obj).flatten()
+        return obj.flatten()
+
+    o = []
+    if fixed_positions:
+        for i, name in enumerate(['left_team', 'left_team_direction',
+                                'right_team', 'right_team_direction']):
+            o.extend(do_flatten(obs[name]))
+            # If there were less than 11vs11 players we backfill missing values
+            # with -1.
+            if len(o) < (i + 1) * 22:
+                o.extend([-1] * ((i + 1) * 22 - len(o)))
+    else:
+        o.extend(do_flatten(obs['left_team']))
+        o.extend(do_flatten(obs['left_team_direction']))
+        o.extend(do_flatten(obs['right_team']))
+        o.extend(do_flatten(obs['right_team_direction']))
+
+    # If there were less than 11vs11 players we backfill missing values with
+    # -1.
+    # 88 = 11 (players) * 2 (teams) * 2 (positions & directions) * 2 (x & y)
+    if len(o) < 88:
+        o.extend([-1] * (88 - len(o)))
+
+    # ball position
+    o.extend(obs['ball'])
+    # ball direction
+    o.extend(obs['ball_direction'])
+    # one hot encoding of which team owns the ball
+    if obs['ball_owned_team'] == -1:
+        o.extend([1, 0, 0])
+    if obs['ball_owned_team'] == 0:
+        o.extend([0, 1, 0])
+    if obs['ball_owned_team'] == 1:
+        o.extend([0, 0, 1])
+
+    active = [0] * 11
+    if obs['active'] != -1:
+        active[obs['active']] = 1
+    o.extend(active)
+
+    game_mode = [0] * 7
+    game_mode[obs['game_mode']] = 1
+    o.extend(game_mode)
+
+    # sticky actions
+    o.extend(obs['sticky_actions'])
+
+    # subjective pose
+    if obs['active'] != -1:
+        o.extend(obs['left_team'][obs['active']])
+        o.extend(obs['left_team_direction'][obs['active']])
+        o.extend(obs['ball'][:2] - obs['left_team'][obs['active']])
+        o.extend(obs['ball_direction'][:2] - obs['left_team_direction'][obs['active']])
+    else:
+        o.extend([-1] * 8)
+
+    return np.array(o, dtype=np.float32)
+
+
+# feature
+
+def feature_from_states(states, info, number):
+    # observation list to input tensor
+
+    HISTORY_LENGTH = 20
+
+    obs_history_ = [s['observation'][number] for s in states[-HISTORY_LENGTH:]]
+    obs_history = [obs_history_[0]] * (HISTORY_LENGTH - len(obs_history_)) + obs_history_
+    obs = obs_history[-1]
+
+    action_history_ = [s['action'][number] for s in states[-HISTORY_LENGTH:]]
+    action_history = [0] * (HISTORY_LENGTH - len(action_history_ )) + action_history_
+
+    """
+    ・left players (x)
+    ・left players (y)
+    ・right players (x)
+    ・right players (y)
+    ・ball (x)
+    ・ball (y)
+    ・left goal (x)
+    ・left goal (y)
+    ・right goal (x)
+    ・right goal (y)
+    ・active (x)
+    ・active (y)
+
+    ・left players (x) - right players (x)
+    ・left players (y) - right players (y)
+    ・left players (x) - ball (x)
+    ・left players (y) - ball (y)
+    ・left players (x) - goal (x)
+    ・left players (y) - goal (y)
+    ・left players (x) - active (x)
+    ・left players (y) - active (y)
+
+    ・left players direction (x)
+    ・left players direction (y)
+    ・right players direction (x)
+    ・right players direction (y)
+    ・left players direction (x) - right players direction (x)
+    ・left players direction (y) - right players direction (y)
+    """
+
+    # left players
+    obs_left_team = np.array(obs['left_team'])
+    left_player_x = obs_left_team[:, 0]
+    left_player_y = obs_left_team[:, 1]
+
+    # right players
+    obs_right_team = np.array(obs['right_team'])
+    right_player_x = obs_right_team[:, 0]
+    right_player_y = obs_right_team[:, 1]
+
+    # ball
+    obs_ball = np.array(obs['ball'])
+    ball_x = obs_ball[0]
+    ball_y = obs_ball[1]
+    ball_z = obs_ball[2]
+
+    # goal
+    left_goal, right_goal = [-1, 0], [1, 0]
+    left_goal_x = left_goal[0]
+    left_goal_y = left_goal[1]
+    right_goal_x = right_goal[0]
+    right_goal_y = right_goal[1]
+
+    # side line
+    side_line_y = [-.42, .42]
+    side_line_y_top = side_line_y[0]
+    side_line_y_bottom = side_line_y[1]
+
+    # active
+    active = np.array(obs['active'])
+    active_player_x = obs_left_team[active][0]
+    active_player_y = obs_left_team[active][1]
+
+    # left players - right players
+    left_minus_right_player_x = obs_left_team[:, 0][..., None] - obs_right_team[:, 0]
+    left_minus_right_player_y = obs_left_team[:, 1][..., None] - obs_right_team[:, 1]
+
+    # left players - ball
+    left_minus_ball_x = obs_left_team[:, 0] - obs_ball[0]
+    left_minus_ball_y = obs_left_team[:, 1] - obs_ball[1]
+
+    # left players - right goal
+    left_minus_right_goal_x = obs_left_team[:, 0] - right_goal[0]
+    left_minus_right_goal_y = obs_left_team[:, 1] - right_goal[1]
+
+    # left players - left goal
+    left_minus_left_goal_x = obs_left_team[:, 0] - left_goal[0]
+    left_minus_left_goal_y = obs_left_team[:, 1] - left_goal[1]
+
+    # right players - right goal
+    right_minus_right_goal_x = obs_right_team[:, 0] - right_goal[0]
+    right_minus_right_goal_y = obs_right_team[:, 1] - right_goal[1]
+
+    # right players - left goal
+    right_minus_left_goal_x = obs_right_team[:, 0] - left_goal[0]
+    right_minus_left_goal_y = obs_right_team[:, 1] - left_goal[1]
+
+    # left players (x) - active
+    left_minus_active_x = obs_left_team[:, 0] - obs_left_team[active][0]
+    left_minus_active_y = obs_left_team[:, 1] - obs_left_team[active][1]
+
+    # right player - ball
+    right_minus_ball_x = obs_right_team[:, 0] - obs_ball[0]
+    right_minus_ball_y = obs_right_team[:, 1] - obs_ball[1]
+
+    # right player - active
+    right_minus_active_x = obs_right_team[:, 0] - obs_left_team[active][0]
+    right_minus_active_y = obs_right_team[:, 1] - obs_left_team[active][1]
+
+    # left player - side line
+    left_minus_side_top = np.abs(obs_left_team[:, 1] - side_line_y[0])
+    left_minus_side_bottom = np.abs(obs_left_team[:, 1] - side_line_y[1])
+
+    # right player - side line
+    right_minus_side_top = np.abs(obs_right_team[:, 1] - side_line_y[0])
+    right_minus_side_bottom = np.abs(obs_right_team[:, 1] - side_line_y[1])
+
+    # left players direction
+    obs_left_team_direction = np.array(obs['left_team_direction'])
+    left_player_direction_x = obs_left_team_direction[:, 0]
+    left_player_direction_y = obs_left_team_direction[:, 1]
+
+    # right players direction
+    obs_right_team_direction = np.array(obs['right_team_direction'])
+    right_player_direction_x = obs_right_team_direction[:, 0]
+    right_player_direction_y = obs_right_team_direction[:, 1]
+
+    # ball direction
+    obs_ball_direction = np.array(obs['ball_direction'])
+    ball_direction_x = obs_ball_direction[0]
+    ball_direction_y = obs_ball_direction[1]
+    ball_direction_z = obs_ball_direction[2]
+
+    # left players direction - right players direction
+    left_minus_right_player_direction_x = obs_left_team_direction[:, 0][..., None] - obs_right_team_direction[:, 0]
+    left_minus_right_player_direction_y = obs_left_team_direction[:, 1][..., None] - obs_right_team_direction[:, 1]
+
+    # left players direction - ball direction
+    left_minus_ball_direction_x = obs_left_team_direction[:, 0] - obs_ball_direction[0]
+    left_minus_ball_direction_y = obs_left_team_direction[:, 1] - obs_ball_direction[1]
+
+    # right players direction - ball direction
+    right_minus_ball_direction_x = obs_right_team_direction[:, 0] - obs_ball_direction[0]
+    right_minus_ball_direction_y = obs_right_team_direction[:, 1] - obs_ball_direction[1]
+
+    # ball rotation
+    obs_ball_rotation = np.array(obs['ball_rotation'])
+    ball_rotation_x = obs_ball_rotation[0]
+    ball_rotation_y = obs_ball_rotation[1]
+    ball_rotation_z = obs_ball_rotation[2]
+
+    cnn_scalar = np.stack([
+        active_player_x,
+        active_player_y,
+        ball_x,
+        ball_y,
+        ball_z,
+        left_goal_x,
+        left_goal_y,
+        right_goal_x,
+        right_goal_y,
+        side_line_y_top,
+        side_line_y_bottom,
+        ball_direction_x,
+        ball_direction_y,
+        ball_direction_z,
+        ball_rotation_x,
+        ball_rotation_y,
+        ball_rotation_z,
+    ]).astype(np.float32)
+
+    cnn_left = np.stack([
+        left_player_x,
+        left_player_y,
+        left_minus_active_x,
+        left_minus_active_y,
+        left_minus_right_goal_x,
+        left_minus_right_goal_y,
+        left_minus_left_goal_x,
+        left_minus_left_goal_y,
+        left_minus_side_top,
+        left_minus_side_bottom,
+        left_minus_ball_x,
+        left_minus_ball_y,
+        left_minus_ball_direction_x,
+        left_minus_ball_direction_y,
+        left_player_direction_x,
+        left_player_direction_y,
+    ]).astype(np.float32)
+
+    cnn_right = np.stack([
+        right_player_x,
+        right_player_y,
+        right_minus_active_x,
+        right_minus_active_y,
+        right_minus_right_goal_x,
+        right_minus_right_goal_y,
+        right_minus_left_goal_x,
+        right_minus_left_goal_y,
+        right_minus_side_top,
+        right_minus_side_bottom,
+        right_minus_ball_x,
+        right_minus_ball_y,
+        right_minus_ball_direction_x,
+        right_minus_ball_direction_y,
+        right_player_direction_x,
+        right_player_direction_y,
+    ]).astype(np.float32)
+
+    cnn_2d = np.stack([
+        left_minus_right_player_x,
+        left_minus_right_player_y,
+        left_minus_right_player_direction_x,
+        left_minus_right_player_direction_y,
+    ]).astype(np.float32)
+
+    # ball
+    BALL_OWEND_1HOT = {-1: [0, 0], 0: [1, 0], 1: [0, 1]}
+    ball_owned_team_ = obs['ball_owned_team']
+    ball_owned_team = BALL_OWEND_1HOT[ball_owned_team_]  # {-1, 0, 1} None, self, opponent
+    PLAYER_1HOT = np.concatenate([np.eye(11), np.zeros((1, 11))])
+    ball_owned_player_ = PLAYER_1HOT[obs['ball_owned_player']]  # {-1, N-1}
+    if ball_owned_team_ == -1:
+        my_ball_owned_player = PLAYER_1HOT[-1]
+        op_ball_owned_player = PLAYER_1HOT[-1]
+    elif ball_owned_team_ == 0:
+        my_ball_owned_player = ball_owned_player_
+        op_ball_owned_player = PLAYER_1HOT[-1]
+    else:
+        my_ball_owned_player = PLAYER_1HOT[-1]
+        op_ball_owned_player = ball_owned_player_
+
+    ball_features = np.concatenate([
+        obs['ball'],
+        obs['ball_direction'],
+        obs['ball_rotation']
+    ]).astype(np.float32)
+
+    # self team
+    left_team_features = np.concatenate([
+        [[1] for _ in obs['left_team']],  # left team flag
+        obs['left_team'],  # position
+        obs['left_team_direction'],
+        [[v] for v in obs['left_team_tired_factor']],
+        [[v] for v in obs['left_team_yellow_card']],
+        [[v] for v in obs['left_team_active']],
+        my_ball_owned_player[...,np.newaxis]
+    ], axis=1).astype(np.float32)
+
+    left_team_indice = np.arange(0, 11, dtype=np.int32)
+
+    # opponent team
+    right_team_features = np.concatenate([
+        [[0] for _ in obs['right_team']],  # right team flag
+        obs['right_team'],  # position
+        obs['right_team_direction'],
+        [[v] for v in obs['right_team_tired_factor']],
+        [[v] for v in obs['right_team_yellow_card']],
+        [[v] for v in obs['right_team_active']],
+        op_ball_owned_player[...,np.newaxis]
+    ], axis=1).astype(np.float32)
+
+    right_team_indice = np.arange(0, 11, dtype=np.int32)
+
+    # distance information
+    def get_distance(xy1, xy2):
+        return (((xy1 - xy2) ** 2).sum(axis=-1)) ** 0.5
+
+    def get_line_distance(x1, x2):
+        return np.abs(x1 - x2)
+
+    def multi_scale(x, scale):
+        return 2 / (1 + np.exp(-np.array(x)[..., np.newaxis] / np.array(scale)))
+
+    both_team = np.array(obs['left_team'] + obs['right_team'], dtype=np.float32)
+    ball = np.array([obs['ball'][:2]], dtype=np.float32)
+    goal = np.array([[-1, 0], [1, 0]], dtype=np.float32)
+    goal_line_x = np.array([-1, 1], dtype=np.float32)
+    side_line_y = np.array([-.42, .42], dtype=np.float32)
+
+    # ball <-> goal, goal line, side line distance
+    b2g_distance = get_distance(ball, goal)
+    b2gl_distance = get_line_distance(ball[0][0], goal_line_x)
+    b2sl_distance = get_line_distance(ball[0][1], side_line_y)
+    b2o_distance = np.concatenate([
+        b2g_distance, b2gl_distance, b2sl_distance
+    ], axis=-1)
+
+    # player <-> ball, goal, back line, side line distance
+    p2b_distance = get_distance(both_team[:,np.newaxis,:], ball[np.newaxis,:,:])
+    p2g_distance = get_distance(both_team[:,np.newaxis,:], goal[np.newaxis,:,:])
+    p2gl_distance = get_line_distance(both_team[:,:1], goal_line_x[np.newaxis,:])
+    p2sl_distance = get_line_distance(both_team[:,1:], side_line_y[np.newaxis,:])
+    p2bo_distance = np.concatenate([
+        p2b_distance, p2g_distance, p2gl_distance, p2sl_distance
+    ], axis=-1)
+
+    # player <-> player distance
+    p2p_distance = get_distance(both_team[:,np.newaxis,:], both_team[np.newaxis,:,:])
+
+    # controlled player information
+    control_flag_ = np.array(PLAYER_1HOT[obs['active']], dtype=np.float32)
+    control_flag = np.concatenate([control_flag_, np.zeros(len(obs['right_team']), dtype=np.float32)])[...,np.newaxis]
+
+    # controlled status information
+    DIR = [
+        [-1, 0], [-.707, -.707], [0,  1], [ .707, -.707],  # L, TL, T, TR
+        [ 1, 0], [ .707,  .707], [0, -1], [-.707,  .707]   # R, BR, B, BL
+    ]
+
+    sticky_direction = DIR[np.where(obs['sticky_actions'][:8] == 1)[0][0]] if 1 in obs['sticky_actions'][:8] else [0, 0]
+    sticky_flags = obs['sticky_actions'][8:]
+
+    control_features = np.concatenate([
+        sticky_direction,
+        sticky_flags,
+    ]).astype(np.float32)
+
+    # Match state
+    if obs['steps_left'] > info['half_step']:
+        steps_left_half = obs['steps_left'] - info['half_step']
+    else:
+        steps_left_half = obs['steps_left']
+    match_features = np.concatenate([
+        multi_scale(obs['score'], [1, 3]).ravel(),
+        multi_scale(obs['score'][0] - obs['score'][1], [1, 3]),
+        multi_scale(obs['steps_left'], [10, 100, 1000, 10000]),
+        multi_scale(steps_left_half, [10, 100, 1000, 10000]),
+        ball_owned_team,
+    ]).astype(np.float32)
+
+    mode_index = np.array([obs['game_mode']], dtype=np.int32)
+
+    action_history = np.array(action_history, dtype=np.int32)[..., None]
+
+    return {
+        # features
+        'ball': ball_features,
+        'match': match_features,
+        #'player': {
+        #    'self': left_team_features,
+        #    'opp': right_team_features
+        #},
+        'control': control_features,
+        #'player_index': {
+        #    'self': left_team_indice,
+        #    'opp': right_team_indice
+        #},
+        'mode_index': mode_index,
+        'control_flag': control_flag,
+        # distances
+        #'distance': {
+        #    'p2p': p2p_distance,
+        #    'p2bo': p2bo_distance,
+        #    'b2o': b2o_distance
+        #},
+        # CNN
+        'cnn_feature': {
+            'scalar': cnn_scalar,
+            'left': cnn_left,
+            'right': cnn_right,
+            '2d': cnn_2d
+        },
+        'action_history': action_history
+    }
+
+
+# https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/football/helpers.py
+
+import enum
+
+class Action(enum.IntEnum):
+    Idle = 0
+    Left = 1
+    TopLeft = 2
+    Top = 3
+    TopRight = 4
+    Right = 5
+    BottomRight = 6
+    Bottom = 7
+    BottomLeft = 8
+    LongPass= 9
+    HighPass = 10
+    ShortPass = 11
+    Shot = 12
+    Sprint = 13
+    ReleaseDirection = 14
+    ReleaseSprint = 15
+    Slide = 16
+    Dribble = 17
+    ReleaseDribble = 18
+
+sticky_index_to_action = [
+    Action.Left,
+    Action.TopLeft,
+    Action.Top,
+    Action.TopRight,
+    Action.Right,
+    Action.BottomRight,
+    Action.Bottom,
+    Action.BottomLeft,
+    Action.Sprint,
+    Action.Dribble
+]
+
+action_to_sticky_index = {
+    a: index for index, a in enumerate(sticky_index_to_action)
+}
+
+class PlayerRole(enum.IntEnum):
+    GoalKeeper = 0
+    CenterBack = 1
+    LeftBack = 2
+    RightBack = 3
+    DefenceMidfield = 4
+    CentralMidfield = 5
+    LeftMidfield = 6
+    RIghtMidfield = 7
+    AttackMidfield = 8
+    CentralFront = 9
+
+
+class GameMode(enum.IntEnum):
+    Normal = 0
+    KickOff = 1
+    GoalKick = 2
+    FreeKick = 3
+    Corner = 4
+    ThrowIn = 5
+    Penalty = 6
+
+
+class Environment(BaseEnvironment):
+    ACTION_LEN = 19
+    CONTROLLED_PLAYERS = 1
+    FINISH_BY_GOAL = True
+
+    def __init__(self, args=None):
+        self.env = None
+        args = args if args is not None else {}
+        self.limit_step = args.get('limit_step', 1000)
+
+    def reset(self, args=None):
+        if args is None:
+            args = {}
+        show = args.get('show', False)
+
+        if self.env is None:
+            from gfootball.env import create_environment
+
+            self.env = create_environment(
+                env_name="11_vs_11_stochastic",
+                representation='raw',
+                write_full_episode_dumps=show,
+                logdir='videos',
+                write_video=show,
+                number_of_left_players_agent_controls=self.CONTROLLED_PLAYERS,
+                number_of_right_players_agent_controls=self.CONTROLLED_PLAYERS,
+                other_config_options={
+                    'action_set': 'v2',
+                    'video_format': 'webm',
+                })
+
+        if show:
+            self.env.render()
+        obs = self.env.reset()
+        self.update({'observation': obs, 'action': [0] * self.CONTROLLED_PLAYERS * 2}, reset=True)
+
+    def update(self, state, reset):
+        if reset:
+            self.done = False
+            self.prev_score = [0, 0]
+            self.states = []
+            self.half_step = 1500
+            self.reserved_action = [None, None]
+        else:
+            self.prev_score = self.score()
+
+        state = copy.deepcopy(state)
+        state = self._preprocess_state(state)
+        self.states.append(state)
+
+        if reset:
+            self.half_step = state['observation'][0]['steps_left'] // 2
+
+    def step(self, actions):
+        # state transition function
+        # action is integer (0 ~ 18)
+        actions = copy.deepcopy(actions)
+        for i, res_action in enumerate(self.reserved_action):
+            if res_action is not None:
+                actions[i] = res_action
+
+        # step environment
+        flat_actions = [actions[0], actions[1]]
+        obs, _, self.done, _ = self.env.step(flat_actions)
+        self.update({'observation': obs, 'action': flat_actions}, reset=False)
+
+    def diff_info(self):
+        return self.states[-1]
+
+    def turns(self):
+        return self.players()
+
+    def players(self):
+        return [0, 1]
+
+    def terminal(self):
+        # check whether the state is terminal
+        return self.done \
+            or len(self.states) > self.limit_step \
+            or (self.FINISH_BY_GOAL and sum(self.score().values()) > 0)
+
+    def __str__(self):
+        return 'step ' + str(len(self.states)) + ' ' + str(list(self.score().values()))
+
+    def view_transition(self):
+        print(self.states[-1]['action'])
+
+    def score(self):
+        if len(self.states) == 0:
+            return [0, 0]
+        state = self.states[-1]
+        return {p: state['observation'][0]['score'][p] for p in self.players()}
+
+    def reward(self):
+        prev_score = self.prev_score
+        score = self.score()
+
+        rewards = {}
+        for p in self.players():
+            r = 1.0 * (score[p] - prev_score[p]) - 1.0 * (score[1 - p] - prev_score[1 - p])
+            rewards[p] = r
+
+        return rewards
+
+    def outcome(self):
+        scores = self.score()
+        if scores[0] > scores[1]:
+            return {0: 1, 1: -1}
+        elif scores[0] < scores[1]:
+            return {0: -1, 1: 1}
+        return {0: 0, 1: 0}
+
+    def legal_actions(self, player, number=0):
+        # legal action list
+        return [e for e in Action]
+
+    def raw_observation(self, player):
+        return self.states[-1]['observation'][player]
+
+    def observation(self, player, number=0):
+        # input feature for neural nets
+        info = {'half_step': self.half_step}
+        index = player * self.CONTROLLED_PLAYERS + number
+        #return feature_from_states(self.states, info, )
+        return convert_observation_115_plus_alpha(self.states[-1]['observation'][index], index, True)
+
+    def _preprocess_state(self, state):
+        if state is None:
+            return state
+
+        # in ball-dead state, set ball owned player and team
+        for o in state['observation']:
+            mode = o['game_mode']
+            if mode == GameMode.FreeKick or \
+                mode == GameMode.Corner or \
+                mode == GameMode.Penalty or \
+                mode == GameMode.GoalKick:
+                # find nearest player and team
+                def dist(xy1, xy2):
+                    return ((xy1[0] - xy2[0]) ** 2 + (xy1[1] - xy2[1]) ** 2) ** 0.5
+                team_player_position = [(0, i, p) for i, p in enumerate(o['left_team'])] + \
+                    [(1, i, p) for i, p in enumerate(o['right_team'])]
+                distances = [(t[0], t[1], dist(t[2], o['ball'][:2])) for t in team_player_position]
+                distances = sorted(distances, key=lambda x: x[2])
+
+                o['ball_owned_team'] = distances[0][0]
+                o['ball_owned_player'] = distances[0][1]
+
+        return state
+
+    def rule_based_action(self, player=None, number=0, key=None):
+        if key is None:
+            key = 'builtin_ai'
+
+        if key == 'builtin_ai':
+            return 19
+        elif key == 'idle':
+            return 14
+        elif key == 'right':
+            return 5
+
+    def net(self):
+        #return FootballNet()
+        return FootballRecurrentNet()
+
+
+if __name__ == '__main__':
+    e = Environment()
+    for _ in range(1):
+        e.reset()
+        o = e.observation(0)
+        while not e.terminal():
+            # print(e)
+            _ = e.observation(0)
+            _ = e.observation(1)
+            #print(e.raw_observation(0)[0]['steps_left'])
+            action_list = [0, 0]
+            action_list[0] = random.choice(e.legal_actions(0))
+            action_list[1] = 19
+            print(len(e.states), action_list)
+            e.step(action_list)
+            print(e.reward())
+            if sum(e.score().values()) > 0:
+                print('goal!')
+        print(e.outcome())
diff --git a/handyrl/evaluation.py b/handyrl/evaluation.py
index 248f5b6c..1a425c63 100755
--- a/handyrl/evaluation.py
+++ b/handyrl/evaluation.py
@@ -81,6 +81,7 @@ def observe(self, player):
 
 def exec_match(env, agents, critic=None, show=False, game_args={}):
     ''' match with shared game environment '''
+    game_args['show'] = show
     if env.reset(game_args):
         return None
     for agent in agents.values():
@@ -110,6 +111,7 @@ def exec_match(env, agents, critic=None, show=False, game_args={}):
 
 def exec_network_match(env, network_agents, critic=None, show=False, game_args={}):
     ''' match with divided game environment '''
+    game_args['show'] = show
     if env.reset(game_args):
         return None
     for p, agent in network_agents.items():
@@ -378,14 +380,18 @@ def eval_main(args, argv):
     prepare_env(env_args)
     env = make_env(env_args)
 
-    model_path = argv[0] if len(argv) >= 1 else 'models/latest.pth'
+    model_paths = argv[0].split(':') if len(argv) >= 1 else ['models/latest.pth']
     num_games = int(argv[1]) if len(argv) >= 2 else 100
     num_process = int(argv[2]) if len(argv) >= 3 else 1
 
-    agent1 = build_agent(model_path, env)
-    if agent1 is None:
-        model = load_model(model_path, env.net())
-        agent1 = Agent(model)
+    def resolve_agent(model_path):
+        agent = build_agent(model_path, env)
+        if agent is None:
+            model = load_model(model_path, env.net())
+            agent = Agent(model)
+        return agent
+
+    main_agent = resolve_agent(model_paths[0])
     critic = None
 
     print('%d process, %d games' % (num_process, num_games))
@@ -393,7 +399,8 @@ def eval_main(args, argv):
     seed = random.randrange(1e8)
     print('seed = %d' % seed)
 
-    agents = [agent1] + [RandomAgent() for _ in range(len(env.players()) - 1)]
+    opponent = model_paths[1] if len(model_paths) > 1 else 'random'
+    agents = [main_agent] + [resolve_agent(opponent) for _ in range(len(env.players()) - 1)]
 
     evaluate_mp(env, agents, critic, env_args, {'default': {}}, num_process, num_games, seed)