diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index c5345c7fed235..dc153614fcd26 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -152,8 +152,7 @@ def __call__(self, var, block=None): out_dtype = var.dtype out_var = var - # Initialization Ops should be prepended and not appended - op = block._prepend_op( + op = block.append_op( type="fill_constant", outputs={"Out": out_var}, attrs={ @@ -242,7 +241,6 @@ def __call__(self, var, block=None): ["uint16", "float16", "float32", "float64"], "uniform_random") - # Initialization Ops should be prepended and not appended if self._seed == 0: self._seed = block.program.random_seed @@ -260,7 +258,7 @@ def __call__(self, var, block=None): out_dtype = var.dtype out_var = var - op = block._prepend_op( + op = block.append_op( type="uniform_random", inputs={}, outputs={"Out": out_var}, @@ -334,7 +332,7 @@ def __call__(self, var, block=None): check_variable_and_dtype(var, "Out", ["uint16", "float16", "float32", "float64"], "guassian_random") - # Initialization Ops should be prepended and not appended + if self._seed == 0: self._seed = block.program.random_seed @@ -352,7 +350,7 @@ def __call__(self, var, block=None): out_dtype = var.dtype out_var = var - op = block._prepend_op( + op = block.append_op( type="gaussian_random", outputs={"Out": out_var}, attrs={ @@ -418,7 +416,7 @@ def __call__(self, var, block=None): assert isinstance(var, framework.Variable) assert isinstance(block, framework.Block) - # Initialization Ops should be prepended and not appended + if self._seed == 0: self._seed = block.program.random_seed @@ -436,7 +434,7 @@ def __call__(self, var, block=None): out_dtype = var.dtype out_var = var - op = block._prepend_op( + op = block.append_op( type="truncated_gaussian_random", outputs={"Out": out_var}, attrs={ @@ -557,7 +555,7 @@ def __call__(self, var, block=None): if self._uniform: limit = np.sqrt(6.0 / float(fan_in + fan_out)) - op = block._prepend_op( + op = block.append_op( type="uniform_random", inputs={}, outputs={"Out": out_var}, @@ -572,7 +570,7 @@ def __call__(self, var, block=None): else: std = np.sqrt(2.0 / float(fan_in + fan_out)) - op = block._prepend_op( + op = block.append_op( type="gaussian_random", outputs={"Out": out_var}, attrs={ @@ -688,7 +686,7 @@ def __call__(self, var, block=None): if self._uniform: limit = np.sqrt(6.0 / float(fan_in)) - op = block._prepend_op( + op = block.append_op( type="uniform_random", inputs={}, outputs={"Out": out_var}, @@ -703,7 +701,7 @@ def __call__(self, var, block=None): else: std = np.sqrt(2.0 / float(fan_in)) - op = block._prepend_op( + op = block.append_op( type="gaussian_random", outputs={"Out": out_var}, attrs={ @@ -920,7 +918,6 @@ def __call__(self, var, block=None): out_dtype = var.dtype np_value = self._value - # Initialization Ops should be prepended and not appended if out_dtype == VarDesc.VarType.FP32: value_name = "fp32_values" values = [float(v) for v in np_value.flat] @@ -932,7 +929,7 @@ def __call__(self, var, block=None): if self._value.size > 1024 * 1024 * 1024: raise ValueError("The size of input is too big. Please consider " "saving it to file and 'load_op' to load it") - op = block._prepend_op( + op = block.append_op( type='assign_value', outputs={'Out': out_var}, attrs={ diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py index db97e86385ae4..dddb14eb78e8a 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py @@ -247,7 +247,8 @@ def train_model(self, device, use_custom_op=False, use_pe=False): paddle.set_device(device) with paddle.static.scope_guard(paddle.static.Scope()): - with paddle.static.program_guard(paddle.static.Program()): + with paddle.static.program_guard(paddle.static.Program(), + paddle.static.Program()): x = paddle.static.data( shape=[None, self.in_dim], name='x', dtype='float32') y = paddle.static.data( diff --git a/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py b/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py index d615f7cb7044e..f3878dfa2bc76 100644 --- a/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py @@ -119,8 +119,8 @@ def check_dgc_momentum_optimizer(self, init_ops_count = 5 if name == "momentum" else 9 init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), init_ops_count) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[-1].type, "fill_constant") + self.assertAlmostEqual(init_ops[-1].attr('value'), learning_rate) # check dgc op regularization coeff train_ops = program.global_block().ops diff --git a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py index 4d1e936558abf..be5e87b9d344b 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py @@ -524,13 +524,14 @@ def test_sharding_with_pp(self): # check program startup_prog_op_types = [op.type for op in startup_prog_ops] main_prog_op_types = [op.type for op in main_prog_ops] + print(startup_prog_op_types) self.assertEqual(startup_prog_op_types, [ + 'fill_constant', 'uniform_random', 'fill_constant', + 'uniform_random', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', - 'fill_constant', 'fill_constant', 'fill_constant', 'uniform_random', - 'fill_constant', 'uniform_random', 'fill_constant', 'c_gen_nccl_id', - 'c_comm_init', 'fill_constant', 'c_allreduce_sum', 'c_gen_nccl_id', - 'c_comm_init', 'fill_constant', 'c_allreduce_sum', 'c_gen_nccl_id', - 'c_comm_init', 'c_gen_nccl_id', 'c_comm_init' + 'c_gen_nccl_id', 'c_comm_init', 'fill_constant', 'c_allreduce_sum', + 'c_gen_nccl_id', 'c_comm_init', 'fill_constant', 'c_allreduce_sum', + 'c_gen_nccl_id', 'c_comm_init', 'c_gen_nccl_id', 'c_comm_init' ]) self.assertEqual(main_prog_op_types, [ diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 3d1b08186384c..237ff0c958e39 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -137,9 +137,9 @@ def test_uniform_initializer_random_seed(self): name="param2", initializer=initializer.UniformInitializer(seed=456)) init_op = block.ops[1] - self.assertEqual(init_op.attr("seed"), 123) + self.assertEqual(init_op.attr("seed"), 456) init_op1 = block.ops[0] - self.assertEqual(init_op1.attr("seed"), 456) + self.assertEqual(init_op1.attr("seed"), 123) def test_uniform_initializer(self, dtype="float32"): """Test uniform initializer with supplied attributes @@ -594,12 +594,12 @@ def test_set_global_weight_initilizer(self): block = startup_prog.global_block() self.assertEqual(len(block.ops), 2) - # init bias is the first op, and weight is the second - bias_init_op = block.ops[0] + # init weight is the first op, and bias is the second + bias_init_op = block.ops[1] self.assertEqual(bias_init_op.type, 'fill_constant') self.assertAlmostEqual(bias_init_op.attr('value'), 0.0, delta=DELTA) - param_init_op = block.ops[1] + param_init_op = block.ops[0] self.assertEqual(param_init_op.type, 'uniform_random') self.assertAlmostEqual(param_init_op.attr('min'), -0.5, delta=DELTA) self.assertAlmostEqual(param_init_op.attr('max'), 0.5, delta=DELTA) @@ -624,14 +624,14 @@ def test_set_global_bias_initilizer(self): block = startup_prog.global_block() self.assertEqual(len(block.ops), 2) - # init bias is the first op, and weight is the second - bias_init_op = block.ops[0] + # init weight is the first op, and bias is the second + bias_init_op = block.ops[1] self.assertEqual(bias_init_op.type, 'gaussian_random') self.assertAlmostEqual(bias_init_op.attr('mean'), 0.0, delta=DELTA) self.assertAlmostEqual(bias_init_op.attr('std'), 2.0, delta=DELTA) self.assertEqual(bias_init_op.attr('seed'), 0) - param_init_op = block.ops[1] + param_init_op = block.ops[0] self.assertEqual(param_init_op.type, 'uniform_random') self.assertAlmostEqual(param_init_op.attr('min'), -0.5, delta=DELTA) self.assertAlmostEqual(param_init_op.attr('max'), 0.5, delta=DELTA) @@ -665,5 +665,49 @@ def test_uniform_initializer(self, dtype="float32"): paddle.enable_static() +class TesetconsistencyOfDynamicAndStaticGraph(unittest.TestCase): + def test_order(self): + paddle.set_device('cpu') + SEED = 123 + weight_attr = paddle.framework.ParamAttr( + name="linear_weight", + learning_rate=1.0, + trainable=False, + regularizer=None, + initializer=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0)) + bias_attr = paddle.framework.ParamAttr( + name="linear_bias", + learning_rate=1.0, + trainable=False, + regularizer=None, + initializer=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0)) + + def run_dynamic_graph(): + paddle.disable_static() + paddle.seed(SEED) + linear = paddle.nn.Linear( + 1, 1, weight_attr=weight_attr, bias_attr=bias_attr) + return linear.weight.numpy(), linear.bias.numpy() + paddle.enable_static() + + def run_static_graph(): + paddle.enable_static() + exe = paddle.static.Executor(paddle.CPUPlace()) + paddle.seed(SEED) + linear = paddle.nn.Linear( + 1, 1, weight_attr=weight_attr, bias_attr=bias_attr) + res = exe.run(paddle.static.default_startup_program(), + fetch_list=['linear_weight', 'linear_bias']) + return res[0], res[1] + + dynamic_res = run_dynamic_graph() + static_res = run_static_graph() + + self.assertTrue(np.array_equal(dynamic_res[0], static_res[0])) + self.assertTrue(np.array_equal(dynamic_res[1], static_res[1])) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index ffecec1815b15..31704ebcd9192 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -164,10 +164,10 @@ def test_vanilla_momentum_optimizer(self): # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 2) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") - self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + self.assertAlmostEqual(init_ops[1].attr('value'), learning_rate) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), 0.0) def test_nesterov_momentum_optimizer(self): init_program = framework.Program() @@ -217,10 +217,10 @@ def test_nesterov_momentum_optimizer(self): # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 2) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") - self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + self.assertAlmostEqual(init_ops[1].attr('value'), learning_rate) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), 0.0) class TestAdagradOptimizer(unittest.TestCase): @@ -277,10 +277,10 @@ def test_adagrad_optimizer(self): # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 2) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") - self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + self.assertAlmostEqual(init_ops[1].attr('value'), learning_rate) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), 0.0) class TestAdamOptimizer(unittest.TestCase): @@ -344,8 +344,8 @@ def test_adam_optimizer(self): # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 5) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[-1].type, "fill_constant") + self.assertAlmostEqual(init_ops[-1].attr('value'), learning_rate) class TestAdamaxOptimizer(unittest.TestCase): @@ -409,8 +409,8 @@ def test_adamax_optimizer(self): # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 4) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[-1].type, "fill_constant") + self.assertAlmostEqual(init_ops[-1].attr('value'), learning_rate) class TestDpsgdOptimizer(unittest.TestCase): @@ -509,10 +509,10 @@ def test_decayed_adagrad_optimizer(self): # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 2) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") - self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) + self.assertAlmostEqual(init_ops[1].attr('value'), learning_rate) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), 0.0) class TestFtrlOptimizer(unittest.TestCase): @@ -576,8 +576,8 @@ def test_ftrl_optimizer(self): # Check init_program init_ops = init_program.global_block().ops self.assertEqual(len(init_ops), 3) - self.assertEqual(init_ops[0].type, "fill_constant") - self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + self.assertEqual(init_ops[-1].type, "fill_constant") + self.assertAlmostEqual(init_ops[-1].attr('value'), learning_rate) class TestLookaheadOptimizer(unittest.TestCase):