diff --git a/examples/development/main.py b/examples/development/main.py index 8510769d2..b70b0cefa 100644 --- a/examples/development/main.py +++ b/examples/development/main.py @@ -46,6 +46,14 @@ def _build(self): get_environment_from_params(environment_params['evaluation']) if 'evaluation' in environment_params else training_environment) + + seed = variant['run_params']['seed'] + + training_environment.seed(seed) + + # Set a different seed for the evaluation env + # to ensure the policy is not just memorizing action sequences for seen initial states + evaluation_environment.seed(seed + 10) replay_pool = self.replay_pool = ( get_replay_pool_from_variant(variant, training_environment))